From 0f60f67742910ea97fbaad368cd075b46e719525 Mon Sep 17 00:00:00 2001 From: Caren Thomas Date: Fri, 24 Oct 2025 15:15:51 -0700 Subject: [PATCH] remove docs --- fern/assets/leaderboard.js | 152 ---- fern/pages/ade-guide/simulator.mdx | 75 -- fern/pages/advanced/custom_memory.mdx | 75 -- fern/pages/agents/archival_best_practices.mdx | 383 -------- .../pages/agents/archival_memory_overview.mdx | 196 ---- fern/pages/agents/archival_search.mdx | 221 ----- fern/pages/agents/base_tools.mdx | 150 ---- fern/pages/agents/fetch_webpage.mdx | 166 ---- fern/pages/agents/human_in_the_loop.mdx | 690 --------------- fern/pages/agents/json_mode.mdx | 468 ---------- fern/pages/agents/memgpt_agents.mdx | 217 ----- fern/pages/agents/memory_blocks.mdx | 415 --------- fern/pages/agents/message_types.mdx | 459 ---------- fern/pages/agents/overview.mdx | 279 ------ fern/pages/agents/prebuilt_tools.mdx | 94 -- fern/pages/agents/run_code.mdx | 258 ------ .../agents/shared-memory-blocks-guide.mdx | 837 ------------------ fern/pages/agents/sleep_time_agents.mdx | 125 --- fern/pages/agents/tool_rules.mdx | 91 -- fern/pages/agents/web_search.mdx | 342 ------- fern/pages/cookbooks_simple.mdx | 274 ------ fern/pages/deployment/railway.mdx | 85 -- fern/pages/deployment/telemetry.mdx | 54 -- fern/pages/evals/advanced/custom-graders.mdx | 66 -- .../advanced/multi-turn-conversations.mdx | 94 -- fern/pages/evals/cli/commands.mdx | 342 ------- fern/pages/evals/concepts/datasets.mdx | 425 --------- fern/pages/evals/concepts/extractors.mdx | 374 -------- fern/pages/evals/concepts/gates.mdx | 384 -------- fern/pages/evals/concepts/graders.mdx | 330 ------- fern/pages/evals/concepts/overview.mdx | 207 ----- fern/pages/evals/concepts/suites.mdx | 275 ------ fern/pages/evals/concepts/targets.mdx | 329 ------- fern/pages/evals/configuration/suite-yaml.mdx | 427 --------- fern/pages/evals/extractors/builtin.mdx | 96 -- fern/pages/evals/extractors/custom.mdx | 55 -- fern/pages/evals/getting-started.mdx | 264 ------ fern/pages/evals/graders/multi-metric.mdx | 58 -- fern/pages/evals/graders/rubric-graders.mdx | 82 -- fern/pages/evals/graders/tool-graders.mdx | 85 -- fern/pages/evals/overview.mdx | 47 - fern/pages/evals/results/overview.mdx | 484 ---------- fern/pages/evals/troubleshooting.mdx | 267 ------ fern/pages/getting-started/prompts.mdx | 535 ----------- fern/pages/getting-started/quickstart.mdx | 228 ----- fern/pages/legacy/architectures_overview.mdx | 83 -- fern/pages/legacy/heartbeats_legacy.mdx | 51 -- .../legacy/low_latency_agents_legacy.mdx | 94 -- fern/pages/legacy/memgpt_agents_legacy.mdx | 174 ---- fern/pages/legacy/migration_guide.mdx | 347 -------- fern/pages/legacy/naming_history.mdx | 74 -- fern/pages/legacy/react_agents_legacy.mdx | 74 -- fern/pages/legacy/workflows_legacy.mdx | 142 --- fern/pages/selfhosting/overview.mdx | 155 ---- .../tutorials/attaching_detaching_blocks.mdx | 517 ----------- fern/pages/tutorials/hello_world.mdx | 430 --------- fern/pages/tutorials/pdf_chat.mdx | 638 ------------- fern/pages/tutorials/shared-memory-blocks.mdx | 553 ------------ fern/pages/voice/voice.mdx | 35 - fern/pages/voice/voice_livekit.mdx | 118 --- fern/pages/voice/voice_vapi.mdx | 66 -- fern/scripts/prepare-openapi.ts | 218 ----- 62 files changed, 15329 deletions(-) delete mode 100644 fern/assets/leaderboard.js delete mode 100644 fern/pages/ade-guide/simulator.mdx delete mode 100644 fern/pages/advanced/custom_memory.mdx delete mode 100644 fern/pages/agents/archival_best_practices.mdx delete mode 100644 fern/pages/agents/archival_memory_overview.mdx delete mode 100644 fern/pages/agents/archival_search.mdx delete mode 100644 fern/pages/agents/base_tools.mdx delete mode 100644 fern/pages/agents/fetch_webpage.mdx delete mode 100644 fern/pages/agents/human_in_the_loop.mdx delete mode 100644 fern/pages/agents/json_mode.mdx delete mode 100644 fern/pages/agents/memgpt_agents.mdx delete mode 100644 fern/pages/agents/memory_blocks.mdx delete mode 100644 fern/pages/agents/message_types.mdx delete mode 100644 fern/pages/agents/overview.mdx delete mode 100644 fern/pages/agents/prebuilt_tools.mdx delete mode 100644 fern/pages/agents/run_code.mdx delete mode 100644 fern/pages/agents/shared-memory-blocks-guide.mdx delete mode 100644 fern/pages/agents/sleep_time_agents.mdx delete mode 100644 fern/pages/agents/tool_rules.mdx delete mode 100644 fern/pages/agents/web_search.mdx delete mode 100644 fern/pages/cookbooks_simple.mdx delete mode 100644 fern/pages/deployment/railway.mdx delete mode 100644 fern/pages/deployment/telemetry.mdx delete mode 100644 fern/pages/evals/advanced/custom-graders.mdx delete mode 100644 fern/pages/evals/advanced/multi-turn-conversations.mdx delete mode 100644 fern/pages/evals/cli/commands.mdx delete mode 100644 fern/pages/evals/concepts/datasets.mdx delete mode 100644 fern/pages/evals/concepts/extractors.mdx delete mode 100644 fern/pages/evals/concepts/gates.mdx delete mode 100644 fern/pages/evals/concepts/graders.mdx delete mode 100644 fern/pages/evals/concepts/overview.mdx delete mode 100644 fern/pages/evals/concepts/suites.mdx delete mode 100644 fern/pages/evals/concepts/targets.mdx delete mode 100644 fern/pages/evals/configuration/suite-yaml.mdx delete mode 100644 fern/pages/evals/extractors/builtin.mdx delete mode 100644 fern/pages/evals/extractors/custom.mdx delete mode 100644 fern/pages/evals/getting-started.mdx delete mode 100644 fern/pages/evals/graders/multi-metric.mdx delete mode 100644 fern/pages/evals/graders/rubric-graders.mdx delete mode 100644 fern/pages/evals/graders/tool-graders.mdx delete mode 100644 fern/pages/evals/overview.mdx delete mode 100644 fern/pages/evals/results/overview.mdx delete mode 100644 fern/pages/evals/troubleshooting.mdx delete mode 100644 fern/pages/getting-started/prompts.mdx delete mode 100644 fern/pages/getting-started/quickstart.mdx delete mode 100644 fern/pages/legacy/architectures_overview.mdx delete mode 100644 fern/pages/legacy/heartbeats_legacy.mdx delete mode 100644 fern/pages/legacy/low_latency_agents_legacy.mdx delete mode 100644 fern/pages/legacy/memgpt_agents_legacy.mdx delete mode 100644 fern/pages/legacy/migration_guide.mdx delete mode 100644 fern/pages/legacy/naming_history.mdx delete mode 100644 fern/pages/legacy/react_agents_legacy.mdx delete mode 100644 fern/pages/legacy/workflows_legacy.mdx delete mode 100644 fern/pages/selfhosting/overview.mdx delete mode 100644 fern/pages/tutorials/attaching_detaching_blocks.mdx delete mode 100644 fern/pages/tutorials/hello_world.mdx delete mode 100644 fern/pages/tutorials/pdf_chat.mdx delete mode 100644 fern/pages/tutorials/shared-memory-blocks.mdx delete mode 100644 fern/pages/voice/voice.mdx delete mode 100644 fern/pages/voice/voice_livekit.mdx delete mode 100644 fern/pages/voice/voice_vapi.mdx delete mode 100644 fern/scripts/prepare-openapi.ts diff --git a/fern/assets/leaderboard.js b/fern/assets/leaderboard.js deleted file mode 100644 index d888a8f0..00000000 --- a/fern/assets/leaderboard.js +++ /dev/null @@ -1,152 +0,0 @@ -/* ────────────────────────────────────────────────────────── - assets/leaderboard.js - Load via docs.yml → js: - path: assets/leaderboard.js - (strategy: lazyOnload is fine) - ────────────────────────────────────────────────────────── */ - -import yaml from 'https://cdn.jsdelivr.net/npm/js-yaml@4.1.0/+esm'; - -console.log('🏁 leaderboard.js loaded on', location.pathname); - -const COST_CAP = 120; - -/* ---------- helpers ---------- */ -const pct = (v) => Number(v).toPrecision(3) + '%'; -const cost = (v) => '$' + Number(v).toFixed(2); -const ready = (cb) => - document.readyState === 'loading' - ? document.addEventListener('DOMContentLoaded', cb) - : cb(); - -/* ---------- main ---------- */ -ready(async () => { - // const host = document.getElementById('letta-leaderboard'); - // if (!host) { - // console.warn('LB-script: #letta-leaderboard not found - bailing out.'); - // return; - // } - /* ---- wait for the leaderboard container to appear (SPA nav safe) ---- */ - const host = await new Promise((resolve, reject) => { - const el = document.getElementById('letta-leaderboard'); - if (el) return resolve(el); // SSR / hard refresh path - - const obs = new MutationObserver(() => { - const found = document.getElementById('letta-leaderboard'); - if (found) { - obs.disconnect(); - resolve(found); // CSR navigation path - } - }); - obs.observe(document.body, { childList: true, subtree: true }); - - setTimeout(() => { - obs.disconnect(); - reject(new Error('#letta-leaderboard never appeared')); - }, 5000); // safety timeout - }).catch((err) => { - console.warn('LB-script:', err.message); - return null; - }); - if (!host) return; // still no luck → give up - - /* ----- figure out URL of data.yaml ----- */ - // const path = location.pathname.endsWith('/') - // ? location.pathname - // : location.pathname.replace(/[^/]*$/, ''); // strip file/slug - // const dataUrl = `${location.origin}${path}data.yaml`; - // const dataUrl = `${location.origin}/leaderboard/data.yaml`; // one-liner, always right - // const dataUrl = `${location.origin}/assets/leaderboard.yaml`; - // const dataUrl = `./assets/leaderboard.yaml`; // one-liner, always right - // const dataUrl = `${location.origin}/data.yaml`; // one-liner, always right - const dataUrl = 'https://raw.githubusercontent.com/letta-ai/letta-evals/refs/heads/main/letta-leaderboard/leaderboard_results.yaml'; - // const dataUrl = 'https://cdn.jsdelivr.net/gh/letta-ai/letta-evals@latest/letta-leaderboard/leaderboard_results.yaml'; - - console.log('LB-script: fetching', dataUrl); - - /* ----- fetch & parse YAML ----- */ - let rows; - try { - const resp = await fetch(dataUrl); - console.log(`LB-script: status ${resp.status}`); - if (!resp.ok) throw new Error(`HTTP ${resp.status}`); - rows = yaml.load(await resp.text()); - } catch (err) { - console.error('LB-script: failed to load YAML →', err); - return; - } - - /* ----- wire up table ----- */ - const dir = Object.create(null); - const tbody = document.getElementById('lb-body'); - const searchI = document.getElementById('lb-search'); - const headers = document.querySelectorAll('#lb-table thead th[data-key]'); - searchI.value = ''; // clear any persisted filter - - const render = () => { - const q = searchI.value.toLowerCase(); - tbody.innerHTML = rows - .map((r) => { - const over = r.total_cost > COST_CAP; - const barW = over ? '100%' : (r.total_cost / COST_CAP) * 100 + '%'; - const costCls = over ? 'cost-high' : 'cost-ok'; - const warnIcon = over - ? `` - : ''; - - return ` - - ${r.model} - - -
- ${pct(r.average)} - - - -
- ${cost(r.total_cost)} - ${warnIcon} - - `; - }) - .join(''); - }; - - const setIndicator = (activeKey) => { - headers.forEach((h) => { - h.classList.remove('asc', 'desc'); - if (h.dataset.key === activeKey) h.classList.add(dir[activeKey]); - }); - }; - - /* initial sort ↓ */ - dir.average = 'desc'; - rows.sort((a, b) => b.average - a.average); - setIndicator('average'); - render(); - - /* search */ - searchI.addEventListener('input', render); - - /* column sorting */ - headers.forEach((th) => { - const key = th.dataset.key; - th.addEventListener('click', () => { - const asc = dir[key] === 'desc'; - dir[key] = asc ? 'asc' : 'desc'; - - rows.sort((a, b) => { - const va = a[key], - vb = b[key]; - const cmp = - typeof va === 'number' - ? va - vb - : String(va).localeCompare(String(vb)); - return asc ? cmp : -cmp; - }); - - setIndicator(key); - render(); - }); - }); -}); diff --git a/fern/pages/ade-guide/simulator.mdx b/fern/pages/ade-guide/simulator.mdx deleted file mode 100644 index 5d01938b..00000000 --- a/fern/pages/ade-guide/simulator.mdx +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: Agent Simulator -subtitle: Use the agent simulator to chat with your agent -slug: guides/ade/simulator ---- - -The Agent Simulator is the central interface where you interact with your agent in real-time. It provides a comprehensive view of your agent's conversation history and tool usage while offering an intuitive chat interface. - - - - -## Key Features - -### Conversation Visualization - -The simulator displays the complete event and conversation (or event) history of your agent, organized chronologically. Each message is color-coded and formatted according to its type for clear differentiation: - -- **User Messages**: Messages sent by you (the user) to the agent. These appear on the right side of the conversation view. -- **Agent Messages**: Responses generated by the agent and directed to the user. These appear on the left side of the conversation view. -- **System Messages**: Non-user messages that represent events or notifications, such as `[Alert] The user just logged on` or `[Notification] File upload completed`. These provide context about events happening in the environment. -- **Function (Tool) Messages** : Detailed records of tool executions, including: - - Tool calls made by the agent - - Arguments passed to the tools - - Results returned by the tools - - Any errors encountered during execution - -If an error occurs during tool execution, the agent is given an opportunity to handle the error and continue execution by calling the tool again. -The simulator supports real-time streaming of agent responses, allowing you to see the agent's thought process as it happens. - - -### Advanced Conversation Controls - -Beyond basic chatting, the simulator provides several controls to enhance your interaction: - -- **Message Type Selection**: Toggle between sending user messages or system messages -- **Conversation History**: Scroll through the entire conversation history -- **Message Search**: Quickly find specific messages or tool calls -- **Tool Execution View**: Expand tool calls to see detailed execution information -- **Token Usage**: Monitor token consumption throughout the conversation - -## Using the Simulator Effectively - -### Testing Agent Behavior - -The simulator is ideal for testing how your agent responds to different inputs: - -- Try various user queries to test the agent's understanding -- Send edge case questions to verify error handling -- Use system messages to simulate events and observe reactions - -### Debugging Tool Usage - -When developing custom tools, the simulator provides valuable insights: - -- See exactly which tools the agent chooses to use -- Verify that arguments are correctly formatted -- Check tool execution results and error handling -- Monitor the agent's interpretation of tool results - -### Simulating Multi-turn Conversations - -To test your agent's memory and conversation abilities: - -1. Start with a simple query to establish context -2. Follow up with related questions to test if the agent maintains context -3. Introduce new topics to see how the agent handles context switching -4. Return to previous topics to verify if information was retained - -### Best Practices - -- **Start with simple queries**: Begin testing with straightforward questions before moving to complex scenarios -- **Monitor tool usage**: Pay attention to which tools the agent chooses and why -- **Test edge cases**: Deliberately test how your agent handles unexpected inputs -- **Use system messages**: Simulate environmental events to test agent adaptability -- **Review context window**: Cross-reference with the Context Window Viewer to understand what information the agent is using to form responses diff --git a/fern/pages/advanced/custom_memory.mdx b/fern/pages/advanced/custom_memory.mdx deleted file mode 100644 index bf040776..00000000 --- a/fern/pages/advanced/custom_memory.mdx +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: Creating custom memory classes -subtitle: Learn how to create custom memory classes -slug: guides/agents/custom-memory ---- - - -## Customizing in-context memory management - -We can extend both the `BaseMemory` and `ChatMemory` classes to implement custom in-context memory management for agents. -For example, you can add an additional memory section to "human" and "persona" such as "organization". - -In this example, we'll show how to implement in-context memory management that treats memory as a task queue. -We'll call this `TaskMemory` and extend the `ChatMemory` class so that we have both the original `ChatMemory` tools (`core_memory_replace` & `core_memory_append`) as well as the "human" and "persona" fields. - -We show an implementation of `TaskMemory` below: -```python -from letta.memory import ChatMemory, MemoryModule -from typing import Optional, List - -class TaskMemory(ChatMemory): - - def __init__(self, human: str, persona: str, tasks: List[str]): - super().__init__(human=human, persona=persona) - self.memory["tasks"] = MemoryModule(limit=2000, value=tasks) # create an empty list - - - - def task_queue_push(self, task_description: str) -> Optional[str]: - """ - Push to a task queue stored in core memory. - - Args: - task_description (str): A description of the next task you must accomplish. - - Returns: - Optional[str]: None is always returned as this function does not produce a response. - """ - self.memory["tasks"].value.append(task_description) - return None - - def task_queue_pop(self) -> Optional[str]: - """ - Get the next task from the task queue - - Returns: - Optional[str]: The description of the task popped from the queue, - if there are still tasks in queue. Otherwise, returns None (the - task queue is empty) - """ - if len(self.memory["tasks"].value) == 0: - return None - task = self.memory["tasks"].value[0] - self.memory["tasks"].value = self.memory["tasks"].value[1:] - return task -``` - -To create an agent with this custom memory type, we can simply pass in an instance of `TaskMemory` into the agent creation. -We also will modify the persona of the agent to explain how the "tasks" section of memory should be used: -```python -task_agent_state = client.create_agent( - name="task_agent", - memory=TaskMemory( - human="My name is Sarah", - persona="You have an additional section of core memory called `tasks`. " \ - + "This section of memory contains of list of tasks you must do." \ - + "Use the `task_queue_push` tool to write down tasks so you don't forget to do them." \ - + "If there are tasks in the task queue, you should call `task_queue_pop` to retrieve and remove them. " \ - + "Keep calling `task_queue_pop` until there are no more tasks in the queue. " \ - + "Do *not* respond to the user until you have completed all tasks in your queue. " \ - + "If you call `task_queue_pop`, you must always do what the popped task specifies", - tasks=["start calling yourself Bob", "tell me a haiku with my name"], - ) -) -``` diff --git a/fern/pages/agents/archival_best_practices.mdx b/fern/pages/agents/archival_best_practices.mdx deleted file mode 100644 index 640687a4..00000000 --- a/fern/pages/agents/archival_best_practices.mdx +++ /dev/null @@ -1,383 +0,0 @@ ---- -title: Best Practices -subtitle: Patterns, pitfalls, and advanced usage -slug: guides/agents/archival-best-practices ---- - -## Agent best practices - -These patterns help agents use archival memory effectively during conversations. - -### 1. Avoid over-insertion - -The most common pitfall is inserting too many memories, creating clutter. Trust the agent to decide what's worth storing long-term. - -### 2. Use tags consistently - -Establish a tag taxonomy and stick to it. Good language models typically handle tagging well. - -### 3. Add context to insertions - -❌ Don't: "Likes replicants" -✅ Do: "Deckard shows unusual empathy toward replicants, particularly Rachael, suggesting possible replicant identity" - -### 4. Let agents experiment - -Agents can test different query styles to understand what works: - -```python -# What the agent does (agent tool call) -archival_memory_search(query="How does the Voight-Kampff test work?") -archival_memory_search(query="Voight-Kampff procedure") -archival_memory_search(query="replicant detection method") -``` - -**Important:** Have the agent persist learnings from experimentation in a memory block (like `archival_tracking` or `archival_policies`), not in archival itself (avoid meta-clutter). - -## Developer best practices (SDK) - -These patterns help developers configure and manage archival memory via the SDK. - -### Backfilling archives - -Developers can pre-load archival memory with existing knowledge via the SDK: - - -```typescript TypeScript -// Load company policies -const policies = [ - "All replicants must undergo Voight-Kampff testing upon arrival", - "Blade Runner units are authorized to retire rogue replicants", - "Tyrell Corporation employees must report suspected replicants immediately" -]; - -for (const policy of policies) { - await client.agents.passages.insert(agent.id, { - content: policy, - tags: ["policy", "company", "protocol"] - }); -} - -// Load technical documentation -const docs = [ - { - content: "Nexus-6 replicants: Superior strength, agility, and intelligence. Four-year lifespan prevents emotional development.", - tags: ["technical", "nexus-6", "specifications"] - }, - { - content: "Voight-Kampff test: Measures capillary dilation, blush response, and pupil dilation to detect replicants.", - tags: ["technical", "testing", "voight-kampff"] - } -]; - -for (const doc of docs) { - await client.agents.passages.insert(agent.id, { - content: doc.content, - tags: doc.tags - }); -} -``` -```python Python -# Load company policies -policies = [ - "All replicants must undergo Voight-Kampff testing upon arrival", - "Blade Runner units are authorized to retire rogue replicants", - "Tyrell Corporation employees must report suspected replicants immediately" -] - -for policy in policies: - client.agents.passages.insert( - agent_id=agent.id, - content=policy, - tags=["policy", "company", "protocol"] - ) - -# Load technical documentation -docs = [ - { - "content": "Nexus-6 replicants: Superior strength, agility, and intelligence. Four-year lifespan prevents emotional development.", - "tags": ["technical", "nexus-6", "specifications"] - }, - { - "content": "Voight-Kampff test: Measures capillary dilation, blush response, and pupil dilation to detect replicants.", - "tags": ["technical", "testing", "voight-kampff"] - } -] - -for doc in docs: - client.agents.passages.insert( - agent_id=agent.id, - content=doc["content"], - tags=doc["tags"] - ) -``` - - -**Use cases for backfilling:** -- Migrating knowledge bases to Letta -- Seeding specialized agents with domain knowledge -- Loading historical conversation logs -- Importing research libraries - -### Create an archival policies block - -Help your agent learn how to use archival memory effectively by creating a dedicated memory block for archival usage policies: - - -```typescript TypeScript -await client.blocks.create({ - label: "archival_policies", - value: ` - When to insert into archival: - - User preferences and important facts about the user - - Technical specifications and reference information - - Significant decisions or outcomes from conversations - - When NOT to insert: - - Temporary conversational context - - Information already stored - - Trivial details or pleasantries - - Search strategies: - - Use natural language questions for best results - - Include tags when filtering by category - - Try semantic variations if first search doesn't find what you need - ` -}); -``` -```python Python -client.blocks.create( - label="archival_policies", - value=""" - When to insert into archival: - - User preferences and important facts about the user - - Technical specifications and reference information - - Significant decisions or outcomes from conversations - - When NOT to insert: - - Temporary conversational context - - Information already stored - - Trivial details or pleasantries - - Search strategies: - - Use natural language questions for best results - - Include tags when filtering by category - - Try semantic variations if first search doesn't find what you need - """ -) -``` - - -You can improve this block through conversation with your agent: - -> **You:** "I noticed you didn't store the fact that I prefer TypeScript for backend development. Update your archival policies block to ensure you capture language preferences in the future." - -> **Agent:** Updates the archival_policies block to include "Programming language preferences" under "When to insert into archival" - -This collaborative approach helps agents learn from mistakes and improve their archival memory usage over time. - -### Track query effectiveness - -Build self-improving agents by having them track archival search effectiveness in a memory block: - - -```typescript TypeScript -// Create a memory block for tracking -await client.blocks.create({ - label: "archival_tracking", - value: ` - Query patterns: Natural language questions work best - Recent searches: "test procedures" (3 results), "replicant specs" (5 results) - Success rate: ~85% of searches return relevant results - Frequently searched topics: [technical specifications, protocols, case histories] - Common patterns: Queries about technical specs work better than vague questions - Improvements needed: Add more tags for better filtering - ` -}); -``` -```python Python -# Create a memory block for tracking -client.blocks.create( - label="archival_tracking", - value=""" - Query patterns: Natural language questions work best - Recent searches: "test procedures" (3 results), "replicant specs" (5 results) - Success rate: ~85% of searches return relevant results - Frequently searched topics: [technical specifications, protocols, case histories] - Common patterns: Queries about technical specs work better than vague questions - Improvements needed: Add more tags for better filtering - """ -) -``` - - -The agent can update this block based on search results and continuously refine its archival strategy. - -### Enforcing archival usage with tool rules - -If your agent forgets to use archival memory, you should first try prompting the agent to use it more consistently. If prompting alone doesn't work, you can enforce archival usage with [tool rules](/guides/agents/tool-rules). - -**Force archival search at turn start:** - - -```typescript TypeScript -await client.agents.update(agent.id, { - toolRules: [ - { type: "init", toolName: "archival_memory_search" } - ] -}); -``` -```python Python -from letta_client.types import InitToolRule - -client.agents.update( - agent_id=agent.id, - tool_rules=[ - InitToolRule(tool_name="archival_memory_search") - ] -) -``` - - - - -**Using the ADE:** Tool rules can also be configured in the Agent Development Environment's Tool Manager interface. - - - -**Note:** Anthropic models don't support strict structured output, so tool rules may not be enforced. Use OpenAI or Gemini models for guaranteed tool rule compliance. - - -**When to use tool rules:** -- Knowledge management agents that should always search context -- Agents that need to learn from every interaction -- Librarian/archivist agents focused on information storage - -**Latency considerations:** Forcing archival search adds a tool call at the start of every turn. For latency-sensitive applications (like customer support), consider making archival search optional. - -[Learn more about tool rules →](/guides/agents/tool-rules) - -### Modifying archival memories - -While agents cannot modify archival memories, developers can update or delete them via the SDK: - - -```typescript TypeScript -// Update a memory -await client.agents.passages.update(agent.id, passage.id, { - content: "Updated content", - tags: ["new", "tags"] -}); - -// Delete a memory -await client.agents.passages.delete(agent.id, passage.id); -``` -```python Python -# Update a memory -client.agents.passages.update( - agent_id=agent.id, - passage_id=passage.id, - content="Updated content", - tags=["new", "tags"] -) - -# Delete a memory -client.agents.passages.delete( - agent_id=agent.id, - passage_id=passage.id -) -``` - - -This allows you to: -- Fix incorrect information -- Update outdated facts -- Remove sensitive or irrelevant data -- Reorganize tag structures - -## Troubleshooting - -### Why can't my agent delete or modify archival memories? - -Archival memory is designed to be **agent-immutable** by default. Agents can only insert and search, not modify or delete. This is intentional to prevent agents from "forgetting" important information. - -**Solution:** If you need to modify or delete archival memories, use the SDK via `client.agents.passages.update()` or `client.agents.passages.delete()`. - -### When should I use the SDK vs letting the agent handle archival? - -**Let the agent handle it when:** -- The agent needs to decide what's worth remembering during conversations -- You want the agent to curate its own knowledge base -- Information emerges naturally from user interactions - -**Use the SDK when:** -- Pre-loading knowledge before the agent starts (backfilling) -- Cleaning up incorrect or outdated information -- Bulk operations (importing documentation, migrating data) -- Managing memories outside of agent conversations - -### My agent isn't using archival memory - -**Common causes:** -1. **Agent doesn't know to use it** - Add guidance to the agent's system prompt or create an `archival_policies` memory block -2. **Agent doesn't need it yet** - With small amounts of information, agents may rely on conversation history instead -3. **Model limitations** - Some models are better at tool use than others - -**Solutions:** -- Add explicit instructions in the agent's prompt about when to use archival -- Use tool rules to enforce archival usage (see "Enforcing archival usage with tool rules" above) -- Try a different model (OpenAI and Gemini models handle tool use well) - -### Search returns no results or wrong results - -**Common causes:** -1. **Empty archive** - Agent or developer hasn't inserted any memories yet -2. **Query mismatch** - Query doesn't semantically match stored content -3. **Tag filters too restrictive** - Filtering by tags that don't exist or are too narrow - -**Solutions:** -- Verify memories exist using `client.agents.passages.list()` (uses cursor-based pagination with `after`, `before`, and `limit` parameters) -- Try broader or rephrased queries -- Check tags by listing passages to see what's actually stored -- Remove tag filters temporarily to see if that's the issue - -### Agent inserting too many memories - -**Common causes:** -1. **No guidance** - Agent doesn't know when to insert vs when not to -2. **Tool rules forcing insertion** - Tool rules may require archival use -3. **Agent being overly cautious** - Some models default to storing everything - -**Solutions:** -- Create an `archival_policies` block with clear guidelines (see "Create an archival policies block" above) -- Review and adjust tool rules if you're using them -- Add explicit examples of what NOT to store in the agent's prompt - -## Next steps - - - - Learn how to search archival memory effectively - - - Back to archival memory overview - - - Learn about always-visible memory - - - Advanced tool execution constraints - - diff --git a/fern/pages/agents/archival_memory_overview.mdx b/fern/pages/agents/archival_memory_overview.mdx deleted file mode 100644 index 643b90e7..00000000 --- a/fern/pages/agents/archival_memory_overview.mdx +++ /dev/null @@ -1,196 +0,0 @@ ---- -title: Archival Memory -subtitle: Long-term semantic storage for agent knowledge -slug: guides/agents/archival-memory ---- - -## What is archival memory? - -Archival memory is a semantically searchable database where agents store facts, knowledge, and information for long-term retrieval. Unlike memory blocks that are always visible, archival memory is queried on-demand when relevant. - -**Key characteristics:** -- **Agent-immutable** - Agents cannot easily modify or delete archival memories (though developers can via SDK) -- **Unlimited storage** - No practical size limits -- **Semantic search** - Find information by meaning, not exact keywords -- **Tagged organization** - Agents can categorize memories with tags - -**Best for:** Event descriptions, reports, articles, historical records, and reference material that doesn't change frequently. - -## When to use archival memory - -**Use archival memory for:** -- Document repositories (API docs, technical guides, research papers) -- Conversation logs beyond the context window -- Customer interaction history and support tickets -- Reports, articles, and written content -- Code examples and technical references -- Training materials and educational content -- User research data and feedback -- Historical records and event logs - -**Don't use archival memory for:** -- Information that should always be visible → Use memory blocks -- Frequently changing state → Use memory blocks -- Current working memory → Use scratchpad blocks -- Information that needs frequent modification → Use memory blocks - -## How agents interact with archival memory - - -**Two ways to interact with archival memory:** - -**Agent tools** - What agents do autonomously during conversations: -- `archival_memory_insert` - Store new information -- `archival_memory_search` - Query for relevant memories - -**SDK endpoints** - What developers do via `client.agents.passages.*`: -- Insert, search, list, update, and delete memories programmatically -- Manage archival content outside of agent conversations - - -Agents have two primary tools for archival memory: `archival_memory_insert` and `archival_memory_search`. - -### Inserting information - -**Agents** can insert memories during conversations using the `archival_memory_insert` tool: - -```python -# What the agent does (agent tool call) -archival_memory_insert( - content="Deckard retired six replicants in the off-world colonies before returning to Los Angeles", - tags=["replicant", "history", "retirement"] -) -``` - -**Developers** can also insert programmatically via the SDK: - - -```typescript TypeScript -await client.agents.passages.insert(agent.id, { - content: "The Tyrell Corporation's motto: 'More human than human'", - tags: ["company", "motto", "tyrell"] -}); -``` -```python Python -client.agents.passages.insert( - agent_id=agent.id, - content="The Tyrell Corporation's motto: 'More human than human'", - tags=["company", "motto", "tyrell"] -) -``` - - -### Searching for information - -**Agents** can search semantically using the `archival_memory_search` tool: - -```python -# What the agent does (agent tool call) -results = archival_memory_search( - query="replicant lifespan", - tags=["technical"], # Optional: filter by tags - page=0 -) -``` - -**Developers** can also search programmatically via the SDK: - - -```typescript TypeScript -const results = await client.agents.passages.search(agent.id, { - query: "replicant lifespan", - tags: ["technical"], - page: 0 -}); -``` -```python Python -results = client.agents.passages.search( - agent_id=agent.id, - query="replicant lifespan", - tags=["technical"], - page=0 -) -``` - - -Results return **semantically relevant** information - meaning the search understands concepts and meaning, not just exact keywords. For example, searching for "artificial memories" will find "implanted memories" even though the exact words don't match. - -[Learn more about search and querying →](/guides/agents/archival-search) - -## Real-world examples - -### Example 1: Personal knowledge manager -An agent with 30k+ archival memories tracking: -- Personal preferences and history -- Technical learnings and insights -- Article summaries and research notes -- Conversation highlights - -### Example 2: Social media agent -An agent with 32k+ memories tracking interactions: -- User preferences and conversation history -- Common topics and interests -- Interaction patterns and communication styles -- Tags by user, topic, and interaction type - -### Example 3: Customer support agent -- Stores ticket resolutions and common issues -- Tags by product, issue type, priority -- Searches archival for similar past issues -- Learns from successful resolutions over time - -### Example 4: Research assistant -- Stores paper summaries with key findings -- Tags by topic, methodology, author -- Cross-references related research -- Builds a semantic knowledge graph - -## Archival memory vs conversation search - - -**Archival memory** is for **intentional** storage: -- Agents decide what's worth remembering long-term -- Used for facts, knowledge, and reference material -- Curated by the agent through active insertion - -**Conversation search** is for **historical** retrieval: -- Searches through actual past messages -- Used to recall what was said in previous conversations -- Automatic - no agent curation needed - -**Example:** -- User says: "I prefer Python for data science projects" -- **Archival:** Agent inserts "User prefers Python for data science" as a fact -- **Conversation search:** Agent can search for the original message later - -Use archival for structured knowledge, conversation search for historical context. - - -## Next steps - - - - Learn how to write effective queries and filter results - - - Patterns, pitfalls, and advanced usage - - - Learn about always-visible memory - - - Understand Letta's memory system - - diff --git a/fern/pages/agents/archival_search.mdx b/fern/pages/agents/archival_search.mdx deleted file mode 100644 index e7ad05d7..00000000 --- a/fern/pages/agents/archival_search.mdx +++ /dev/null @@ -1,221 +0,0 @@ ---- -title: Searching & Querying -subtitle: How to search archival memory effectively -slug: guides/agents/archival-search ---- - -## Search result format - - -**What agents receive:** Each result contains: -- `content` - The stored text -- `tags` - Associated tags -- `timestamp` - When the memory was created -- `relevance` - Scoring with `rrf_score`, `vector_rank`, `fts_rank` - -Letta uses **hybrid search** combining semantic (vector) and keyword (full-text) search, ranked using Reciprocal Rank Fusion (RRF). Higher `rrf_score` means more relevant. - - -## Writing effective queries - -Letta uses OpenAI's `text-embedding-3-small` model, which handles natural language questions well. Agents can use various query styles: - -**Natural language questions work best:** - -```python -# What the agent does (agent tool call) -archival_memory_search(query="How does the test work?") -# Returns: "The Voight-Kampff test measures involuntary emotional responses..." -``` - -**Keywords also work:** - -```python -# What the agent does (agent tool call) -archival_memory_search(query="replicant lifespan") -# Returns memories containing both keywords and semantically related concepts -``` - -**Concept-based queries leverage semantic understanding:** - -```python -# What the agent does (agent tool call) -archival_memory_search(query="artificial memories") -# Returns: "...experimental replicant with implanted memories..." -# (semantic match despite different terminology) -``` - - -**Pagination:** Agents receive multiple results per search. If an agent doesn't paginate correctly, you can instruct it to adjust the `page` parameter or remind it to iterate through results. - - -## Filtering by time - -Agents can search by date ranges: - -```python -# What the agent does (agent tool call) - -# Recent memories -archival_memory_search( - query="test results", - start_datetime="2025-09-29T00:00:00" -) - -# Specific time window -archival_memory_search( - query="replicant cases", - start_datetime="2025-09-29T00:00:00", - end_datetime="2025-09-30T23:59:59" -) -``` - - -**Agent datetime awareness:** -- Agents know the current day but not the current time -- Agents can see timestamps of messages they've received -- Agents cannot control insertion timestamps (automatic) -- Developers can backdate memories via SDK with `created_at` -- Time filtering enables queries like "what did we discuss last week?" - - -## Tags and organization - -Tags help agents organize and filter archival memories. **Agents always know what tags exist in their archive** since tag lists are compiled into the context window. - -**Common tag patterns:** -- `user_info`, `professional`, `personal_history` -- `documentation`, `technical`, `reference` -- `conversation`, `milestone`, `event` -- `company_policy`, `procedure`, `guideline` - -**Tag search modes:** -- Match any tag -- Match all tags -- Filter by date ranges - -Example of organized tagging: - -```python -# What the agent does (agent tool call) - -# Atomic memory with precise tags -archival_memory_insert( - content="Nexus-6 replicants have a four-year lifespan", - tags=["technical", "replicant", "nexus-6"] -) - -# Later, easy retrieval -archival_memory_search( - query="how long do replicants live", - tags=["technical"] -) -``` - -## Performance and scale - - -Archival memory has no practical size limits and remains fast at scale: - -**Letta Cloud:** Uses [TurboPuffer](https://turbopuffer.com/) for extremely fast semantic search, even with hundreds of thousands of memories. - -**Self-hosted:** Uses pgvector (PostgreSQL) for vector search. Performance scales well with proper indexing. - -**Letta Desktop:** Uses SQLite with vector search extensions. Suitable for personal use cases. - -No matter the backend, archival memory scales to large archives without performance degradation. - - -## Embedding models and search quality - -Archival search quality depends on the agent's embedding model: - -**Letta Cloud:** All agents use `text-embedding-3-small`, which is optimized for most use cases. This model cannot be changed. - -**Self-hosted:** Embedding model is pinned to the agent at creation. The default `text-embedding-3-small` is sufficient for nearly all use cases. - -### Changing embedding models (self-hosted only) - -To change an agent's embedding model, you must: -1. List and export all archival memories -2. Delete all archival memories -3. Update the agent's embedding model -4. Re-insert all memories (they'll be re-embedded) - - -Changing embedding models is a destructive operation. Export your archival memories first. - - -## Programmatic access (SDK) - -Developers can manage archival memory programmatically via the SDK: - - -```typescript TypeScript -// Insert a memory -await client.agents.passages.insert(agent.id, { - content: "The Voight-Kampff test requires a minimum of 20 cross-referenced questions", - tags: ["technical", "testing", "protocol"] -}); - -// Search memories -const results = await client.agents.passages.search(agent.id, { - query: "testing procedures", - tags: ["protocol"], - page: 0 -}); - -// List all memories -const passages = await client.agents.passages.list(agent.id, { - limit: 100 -}); - -// Get a specific memory -const passage = await client.agents.passages.get(agent.id, passageId); -``` -```python Python -# Insert a memory -client.agents.passages.insert( - agent_id=agent.id, - content="The Voight-Kampff test requires a minimum of 20 cross-referenced questions", - tags=["technical", "testing", "protocol"] -) - -# Search memories -results = client.agents.passages.search( - agent_id=agent.id, - query="testing procedures", - tags=["protocol"], - page=0 -) - -# List all memories -passages = client.agents.passages.list( - agent_id=agent.id, - limit=100 -) - -# Get a specific memory -passage = client.agents.passages.get( - agent_id=agent.id, - passage_id=passage_id -) -``` - - -## Next steps - - - - Learn patterns, pitfalls, and advanced usage - - - Back to archival memory overview - - diff --git a/fern/pages/agents/base_tools.mdx b/fern/pages/agents/base_tools.mdx deleted file mode 100644 index 86eb84dc..00000000 --- a/fern/pages/agents/base_tools.mdx +++ /dev/null @@ -1,150 +0,0 @@ ---- -title: Base Tools -subtitle: Built-in tools for memory management and user communication -slug: guides/agents/base-tools ---- - -Base tools are built-in tools that enable memory management, user communication, and access to conversation history and archival storage. - -## Available Base Tools - -| Tool | Purpose | -|------|---------| -| `memory_insert` | Insert text into a memory block | -| `memory_replace` | Replace specific text in a memory block | -| `memory_rethink` | Completely rewrite a memory block | -| `memory_finish_edits` | Signal completion of memory editing | -| `conversation_search` | Search prior conversation history | -| `archival_memory_insert` | Add content to archival memory | -| `archival_memory_search` | Search archival memory | -| `send_message` | Send a message to the user (legacy architectures only) | - -## Memory Block Editing - -Memory blocks are editable sections in the agent's context window. These tools let agents update their own memory. - -See the [Memory Blocks guide](/guides/agents/memory-blocks) for more about how memory blocks work. - -### memory_insert - -Insert text at a specific line in a memory block. - -**Parameters:** -- `label`: Which memory block to edit -- `new_str`: Text to insert -- `insert_line`: Line number (0 for beginning, -1 for end) - -**Common uses:** -- Add new information to the end of a block -- Insert context at the beginning -- Add items to a list - -### memory_replace - -Replace specific text in a memory block. - -**Parameters:** -- `label`: Which memory block to edit -- `old_str`: Exact text to find and replace -- `new_str`: Replacement text - -**Common uses:** -- Update outdated information -- Fix typos or errors -- Delete text (by replacing with empty string) - -**Important:** The `old_str` must match exactly, including whitespace. If it appears multiple times, the tool will error. - -### memory_rethink - -Completely rewrite a memory block's contents. - -**Parameters:** -- `label`: Which memory block to rewrite -- `new_memory`: Complete new contents - -**When to use:** -- Condensing cluttered information -- Major reorganization -- Combining multiple pieces of information - -**When not to use:** -- Adding one line (use `memory_insert`) -- Changing specific text (use `memory_replace`) - -### memory_finish_edits - -Signals that memory editing is complete. - -**Parameters:** None - -Some agent architectures use this to mark the end of a memory update cycle. - -## Recall Memory - -### conversation_search - -Search prior conversation history using both text matching and semantic similarity. - -**Parameters:** -- `query`: What to search for -- `roles`: Optional filter by message role (user, assistant, tool) -- `limit`: Maximum number of results -- `start_date`, `end_date`: ISO 8601 date/datetime filters (inclusive) - -**Returns:** -Matching messages with role and content, ordered by relevance. - -**Example queries:** -- "What did the user say about deployment?" -- "Find previous responses about error handling" -- "Search tool outputs from last week" - -## Archival Memory - -Archival memory stores information long-term outside the context window. See the [Archival Memory documentation](/guides/agents/archival-memory-overview) for details. - -### archival_memory_insert - -Add content to archival memory for long-term storage. - -**Parameters:** -- `content`: Text to store -- `tags`: Optional tags for organization - -**Common uses:** -- Storing reference information for later -- Saving important context that doesn't fit in memory blocks -- Building a knowledge base over time - -### archival_memory_search - -Search archival memory using semantic (embedding-based) search. - -**Parameters:** -- `query`: What to search for semantically -- `tags`: Optional tag filters -- `tag_match_mode`: "any" or "all" for tag matching -- `top_k`: Maximum results -- `start_datetime`, `end_datetime`: ISO 8601 filters (inclusive) - -**Returns:** -Matching passages with timestamps and content, ordered by semantic similarity. - -## Deprecated Tools - -These tools are still available but deprecated: - -| Tool | Use Instead | -|------|-------------| -| `send_message` | Agent responses (no tool needed). See [legacy architectures](/guides/legacy/memgpt_agents_legacy) | -| `core_memory_append` | `memory_insert` with `insert_line=-1` | -| `core_memory_replace` | `memory_replace` | - -## Related Documentation - -- [Memory Blocks](/guides/agents/memory-blocks) -- [Archival Memory](/guides/agents/archival-memory-overview) -- [Utilities](/guides/agents/prebuilt-tools) -- [Multi-Agent Tools](/guides/agents/multiagent) -- [Custom Tools](/guides/agents/custom-tools) diff --git a/fern/pages/agents/fetch_webpage.mdx b/fern/pages/agents/fetch_webpage.mdx deleted file mode 100644 index 8c036c98..00000000 --- a/fern/pages/agents/fetch_webpage.mdx +++ /dev/null @@ -1,166 +0,0 @@ ---- -title: Fetch Webpage -subtitle: Convert webpages to readable text/markdown -slug: guides/agents/fetch-webpage ---- - -The `fetch_webpage` tool enables Letta agents to fetch and convert webpages into readable text or markdown format. Useful for reading documentation, articles, and web content. - - -On [Letta Cloud](/guides/cloud/overview), this tool works out of the box. For self-hosted deployments with an Exa API key, fetching is enhanced. Without a key, it falls back to open-source extraction tools. - - -## Quick Start - - -```python Python -from letta import Letta - -client = Letta(token="LETTA_API_KEY") - -agent = client.agents.create( - model="openai/gpt-4o", - tools=["fetch_webpage"], - memory_blocks=[{ - "label": "persona", - "value": "I can fetch and read webpages to answer questions about online content." - }] -) -``` - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -const agent = await client.agents.create({ - model: "openai/gpt-4o", - tools: ["fetch_webpage"], - memoryBlocks: [{ - label: "persona", - value: "I can fetch and read webpages to answer questions about online content." - }] -}); -``` - - -## Tool Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `url` | `str` | The URL of the webpage to fetch | - -## Return Format - -The tool returns webpage content as text/markdown. - -**With Exa API (if configured):** -```json -{ - "title": "Page title", - "published_date": "2025-01-15", - "author": "Author name", - "text": "Full page content in markdown" -} -``` - -**Fallback (without Exa):** -Returns markdown-formatted text extracted from the HTML. - -## How It Works - -The tool uses a multi-tier approach: - -1. **Exa API** (if `EXA_API_KEY` is configured): Uses Exa's content extraction -2. **Trafilatura** (fallback): Open-source text extraction to markdown -3. **Readability + html2text** (final fallback): HTML cleaning and conversion - -## Self-Hosted Setup - -For enhanced fetching on self-hosted servers, optionally configure an Exa API key. Without it, the tool still works using open-source extraction. - -### Optional: Configure Exa - - -```bash Docker -docker run \ - -e EXA_API_KEY="your_exa_api_key" \ - letta/letta:latest -``` - -```yaml Docker Compose -services: - letta: - environment: - - EXA_API_KEY=your_exa_api_key -``` - -```bash Server -export EXA_API_KEY="your_exa_api_key" -letta server -``` - -```python Per-Agent -agent = client.agents.create( - tools=["fetch_webpage"], - tool_env_vars={ - "EXA_API_KEY": "your_exa_api_key" - } -) -``` - - -## Common Patterns - -### Documentation Reader -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["fetch_webpage", "web_search"], - memory_blocks=[{ - "label": "persona", - "value": "I search for documentation with web_search and read it with fetch_webpage." - }] -) -``` - -### Research Assistant -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["fetch_webpage", "archival_memory_insert"], - memory_blocks=[{ - "label": "persona", - "value": "I fetch articles and store key insights in archival memory for later reference." - }] -) -``` - -### Content Summarizer -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["fetch_webpage"], - memory_blocks=[{ - "label": "persona", - "value": "I fetch webpages and provide summaries of their content." - }] -) -``` - -## When to Use - -| Use Case | Tool | Why | -|----------|------|-----| -| Read specific webpage | `fetch_webpage` | Direct URL access | -| Find webpages to read | `web_search` | Discovery first | -| Read + search in one | `web_search` with `include_text=true` | Combined operation | -| Multiple pages | `fetch_webpage` | Iterate over URLs | - -## Related Documentation - -- [Utilities Overview](/guides/agents/prebuilt-tools) -- [Web Search](/guides/agents/web-search) -- [Run Code](/guides/agents/run-code) -- [Custom Tools](/guides/agents/custom-tools) -- [Tool Variables](/guides/agents/tool-variables) diff --git a/fern/pages/agents/human_in_the_loop.mdx b/fern/pages/agents/human_in_the_loop.mdx deleted file mode 100644 index 9212769b..00000000 --- a/fern/pages/agents/human_in_the_loop.mdx +++ /dev/null @@ -1,690 +0,0 @@ ---- -title: Human-in-the-Loop -slug: guides/agents/human-in-the-loop -subtitle: How to integrate human-in-the-loop workflows for tool approval ---- - - -Human-in-the-Loop support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta). - - -Human-in-the-loop (HITL) workflows allow you to maintain control over critical agent actions by requiring human approval before executing certain tools. This is essential for operations that could have significant consequences, such as database modifications, financial transactions, or external API calls with cost implications. - -```mermaid -flowchart LR - Agent[Agent] -->|Calls Tool| Check{Requires
Approval?} - Check -->|No| Execute[Execute Tool] - Check -->|Yes| Request[Request Approval] - Request --> Human[Human Review] - Human -->|Approve| Execute - Human -->|Deny| Error[Return Error] - Execute --> Result[Return Result] - Error --> Agent - Result --> Agent -``` - -## Overview - -When a tool is marked as requiring approval, the agent will pause execution and wait for human approval or denial before proceeding. This creates a checkpoint in the agent's workflow where human judgment can be applied. The approval workflow is designed to be non-blocking and supports both synchronous and streaming message interfaces, making it suitable for interactive applications as well as batch processing systems. - -### Key Benefits - -- **Risk Mitigation**: Prevent unintended actions in production environments -- **Cost Control**: Review expensive operations before execution -- **Compliance**: Ensure human oversight for regulated operations -- **Quality Assurance**: Validate agent decisions before critical actions - -### How It Works - -The approval workflow follows a clear sequence of steps that ensures human oversight at critical decision points: - -1. **Tool Configuration**: Mark specific tools as requiring approval either globally (default for all agents) or per-agent -2. **Execution Pause**: When the agent attempts to call a protected tool, it immediately pauses and returns an approval request message -3. **Human Review**: The approval request includes the tool name, arguments, and context, allowing you to make an informed decision -4. **Approval/Denial**: Send an approval response to either execute the tool or provide feedback for the agent to adjust its approach -5. **Continuation**: The agent receives the tool result (on approval) or an error message (on denial) and continues processing - - -## Best Practices - -Following these best practices will help you implement effective human-in-the-loop workflows while maintaining a good user experience and system performance. - -### 1. Selective Tool Marking - -Not every tool needs human approval. Be strategic about which tools require oversight to avoid workflow bottlenecks while maintaining necessary controls: - -**Tools that typically require approval:** -- Database write operations (INSERT, UPDATE, DELETE) -- External API calls with financial implications -- File system modifications or deletions -- Communication tools (email, SMS, notifications) -- System configuration changes -- Third-party service integrations with rate limits - -### 2. Clear Denial Reasons - -When denying a request, your feedback directly influences how the agent adjusts its approach. Provide specific, actionable guidance rather than vague rejections: - -```python -# Good: Specific and actionable -"reason": "Use read-only query first to verify the data before deletion" - -# Bad: Too vague -"reason": "Don't do that" -``` - -The agent will use your denial reason to reformulate its approach, so the more specific you are, the better the agent can adapt. - -## Setting Up Approval Requirements - -There are two methods for configuring tool approval requirements, each suited for different use cases. Choose the approach that best fits your security model and operational needs. - -### Method 1: Create/Upsert Tool with Default Approval Requirement - -Set approval requirements at the tool level when creating or upserting a tool. This approach ensures consistent security policies across all agents that use the tool. The `default_requires_approval` flag will be applied to all future agent-tool attachments: - - -```curl curl maxLines=50 -curl --request POST \ - --url http://localhost:8283/v1/tools \ - --header 'Content-Type: application/json' \ - --data '{ - "name": "sensitive_operation", - "default_requires_approval": true, - "json_schema": { - "type": "function", - "function": { - "name": "sensitive_operation", - "parameters": {...} - } - }, - "source_code": "def sensitive_operation(...): ..." - }' - -# All agents using this tool will require approval -curl --request POST \ - --url http://localhost:8283/v1/agents \ - --header 'Content-Type: application/json' \ - --data '{ - "tools": ["sensitive_operation"], - // ... other configuration - }' -``` -```python python maxLines=50 -# Create a tool that requires approval by default -approval_tool = client.tools.upsert_from_function( - func=sensitive_operation, - default_requires_approval=True, -) - -# All agents using this tool will require approval -agent = client.agents.create( - tools=['sensitive_operation'], - # ... other configuration -) -``` -```typescript TypeScript maxLines=50 -// Create a tool that requires approval by default -const approvalTool = await client.tools.upsert({ - name: "sensitive_operation", - defaultRequiresApproval: true, - jsonSchema: { - type: "function", - function: { - name: "sensitive_operation", - parameters: {...} - } - }, - sourceCode: "def sensitive_operation(...): ..." -}); - -// All agents using this tool will require approval -const agent = await client.agents.create({ - tools: ["sensitive_operation"], - // ... other configuration -}); -``` - - -### Method 2: Modify Existing Tool with Default Approval Requirement - - -Modifying the tool-level setting will not retroactively apply to existing agent-tool attachments - it only sets the default for future attachments. This means that if the tool is already attached to an agent, the agent will continue using the tool without approval. To modify an existing agent-tool attachment, refer to Method 3 below. - - -For an already existing tool, you can modify the tool to set approval requirements on future agent-tool attachments. The `default_requires_approval` flag will be applied to all future agent-tool attachments: - - -```curl curl maxLines=50 -curl --request PATCH \ - --url http://localhost:8283/v1/tools/$TOOL_ID \ - --header 'Content-Type: application/json' \ - --data '{ - "default_requires_approval": true - }' - -# All agents using this tool will require approval -curl --request POST \ - --url http://localhost:8283/v1/agents \ - --header 'Content-Type: application/json' \ - --data '{ - "tools": ["sensitive_operation"], - // ... other configuration - }' -``` -```python python maxLines=50 -# Create a tool that requires approval by default -approval_tool = client.tools.modify( - tool_id=sensitive_operation.id, - default_requires_approval=True, -) - -# All agents using this tool will require approval -agent = client.agents.create( - tools=['sensitive_operation'], - # ... other configuration -) -``` -```typescript TypeScript maxLines=50 -// Create a tool that requires approval by default -const approvalTool = await client.tools.modify({ - tool_id=sensitive_operation.id, - defaultRequiresApproval: true, -}); - -// All agents using this tool will require approval -const agent = await client.agents.create({ - tools: ["sensitive_operation"], - // ... other configuration -}); -``` - - -### Method 3: Per-Agent Tool Approval - -Configure approval requirements for specific agent-tool combinations, allowing fine-grained control over individual agent behaviors. This method is particularly useful for: - -- **Trusted agents**: Remove approval requirements for well-tested, reliable agents -- **Progressive autonomy**: Gradually reduce approval requirements as agents prove reliable -- **Override defaults**: Change the approval setting for tools already attached to an agent - -Use the following endpoints to modify approval settings for existing agent-tool relationships: - - -```curl curl maxLines=50 -curl --request PATCH \ - --url http://localhost:8283/v1/agents/$AGENT_ID/tools/$TOOL_NAME/approval \ - --header 'Content-Type: application/json' \ - --data '{ - "requires_approval": true - }' -``` -```python python maxLines=50 -# Modify approval requirement for a specific agent -client.agents.tools.modify_approval( - agent_id=agent.id, - tool_name="database_write", - requires_approval=True, -) - -# Check current approval settings -tools = client.agents.tools.list(agent_id=agent.id) -for tool in tools: - print(f"{tool.name}: requires_approval={tool.requires_approval}") -``` -```typescript TypeScript maxLines=50 -// Modify approval requirement for a specific agent -await client.agents.tools.modifyApproval({ - agentId: agent.id, - toolName: "database_write", - requiresApproval: true, -}); - -// Check current approval settings -const tools = await client.agents.tools.list({ - agentId: agent.id, -}); -for (const tool of tools) { - console.log(`${tool.name}: requires_approval=${tool.requiresApproval}`); -} -``` - - -## Handling Approval Requests - -### Step 1: Agent Requests Approval - -When the agent attempts to call a tool that requires approval, execution immediately pauses. The agent returns a special approval request message containing: - -- **Tool name**: The specific tool being called -- **Arguments**: The exact parameters the agent intends to pass -- **Tool call ID**: A unique identifier for tracking this specific call -- **Message ID**: The approval request ID needed for your response -- **Stop reason**: Set to `"requires_approval"` to indicate the pause state - -This format matches the ToolCallMessage format intentionally, so that we can handle approval requests the same way we handle tool calls. Here's what an approval request looks like in practice: - - -```curl curl maxLines=50 -curl --request POST \ - --url http://localhost:8283/v1/agents/$AGENT_ID/messages \ - --header 'Content-Type: application/json' \ - --data '{ - "messages": [{ - "role": "user", - "content": "Delete all test data from the database" - }] - }' - -# Response includes approval request -{ - "messages": [ - { - "message_type": "reasoning_message", - "reasoning": "I need to delete test data from the database..." - }, - { - "message_type": "approval_request_message", - "id": "message-abc123", - "tool_call": { - "name": "database_write", - "arguments": "{\"query\": \"DELETE FROM test_data\"}", - "tool_call_id": "tool-xyz789" - } - } - ], - "stop_reason": "requires_approval" -} -``` -```python python maxLines=50 -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{ - "role": "user", - "content": "Delete all test data from the database" - }] -) - -# Response includes approval request -{ - "messages": [ - { - "message_type": "reasoning_message", - "reasoning": "I need to delete test data from the database..." - }, - { - "message_type": "approval_request_message", - "id": "message-abc123", - "tool_call": { - "name": "database_write", - "arguments": "{\"query\": \"DELETE FROM test_data\"}", - "tool_call_id": "tool-xyz789" - } - } - ], - "stop_reason": "requires_approval" -} -``` -```typescript TypeScript maxLines=50 -const response = await client.agents.messages.create({ - agentId: agent.id, - requestBody: { - messages: [{ - role: "user", - content: "Delete all test data from the database" - }] - } -}); - -// Response includes approval request -{ - "messages": [ - { - "message_type": "reasoning_message", - "reasoning": "I need to delete test data from the database..." - }, - { - "message_type": "approval_request_message", - "id": "message-abc123", - "tool_call": { - "name": "database_write", - "arguments": "{\"query\": \"DELETE FROM test_data\"}", - "tool_call_id": "tool-xyz789" - } - } - ], - "stop_reason": "requires_approval" -} -``` - - - - -### Step 2: Review and Respond - -Once you receive an approval request, you have two options: approve the tool execution or deny it with guidance. The agent will remain paused until it receives your response. - - While an approval is pending, the agent cannot process any other messages - you must resolve the approval request first. - -#### Approving the Request - -To approve a tool call, send an approval message with `approve: true` and the approval request ID. The agent will immediately execute the tool and continue processing: - - -```curl curl maxLines=50 -curl --request POST \ - --url http://localhost:8283/v1/agents/$AGENT_ID/messages \ - --header 'Content-Type: application/json' \ - --data '{ - "messages": [{ - "type": "approval", - "approvals": [{ - "approve": true, - "tool_call_id": "tool-xyz789" - }] - }] - }' - -# Response continues with tool execution -{ - "messages": [ - { - "message_type": "tool_return_message", - "status": "success", - "tool_return": "Deleted 1,234 test records" - }, - { - "message_type": "reasoning_message", - "reasoning": "I was able to delete the test data. Let me inform the user." - }, - { - "message_type": "assistant_message", - "content": "I've successfully deleted 1,234 test records from the database." - } - ], - "stop_reason": "end_turn" -} -``` -```python python maxLines=50 -# Approve the tool call -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{ - "type": "approval", - "approvals": [{ - "approve": True, - "tool_call_id": "tool-xyz789" - }] - }] -) - -# Response continues with tool execution -{ - "messages": [ - { - "message_type": "tool_return_message", - "status": "success", - "tool_return": "Deleted 1,234 test records" - }, - { - "message_type": "reasoning_message", - "reasoning": "I was able to delete the test data. Let me inform the user." - }, - { - "message_type": "assistant_message", - "content": "I've successfully deleted 1,234 test records from the database." - } - ], - "stop_reason": "end_turn" -} -``` -```typescript TypeScript maxLines=50 -// Approve the tool call -const response = await client.agents.messages.create({ - agentId: agent.id, - requestBody: { - messages: [{ - type: "approval", - approvals: [{ - approve: true, - tool_call_id: "tool-xyz789" - }] - }] - } -}); - -// Response continues with tool execution -{ - "messages": [ - { - "message_type": "tool_return_message", - "status": "success", - "tool_return": "Deleted 1,234 test records" - }, - { - "message_type": "reasoning_message", - "reasoning": "I was able to delete the test data. Let me inform the user." - }, - { - "message_type": "assistant_message", - "content": "I've successfully deleted 1,234 test records from the database." - } - ], - "stop_reason": "end_turn" -} -``` - - -#### Denying with Guidance - -When denying a tool call, you can provide a reason that helps the agent understand how to adjust its approach. The agent will receive an error response and can use your feedback to reformulate its strategy. This is particularly useful for guiding the agent toward safer or more appropriate actions: - - -```curl curl maxLines=50 -curl --request POST \ - --url http://localhost:8283/v1/agents/$AGENT_ID/messages \ - --header 'Content-Type: application/json' \ - --data '{ - "messages": [{ - "type": "approval", - "approvals": [{ - "approve": false, - "tool_call_id": "tool-xyz789", - "reason": "Only delete records older than 30 days, not all test data" - }] - }] - }' - -# Response shows agent adjusting based on feedback -{ - "messages": [ - { - "message_type": "tool_return_message", - "status": "error", - "tool_return": "Error: request denied. Reason: Only delete records older than 30 days, not all test data" - }, - { - "message_type": "reasoning_message", - "reasoning": "I need to modify my query to only delete old records..." - }, - { - "message_type": "tool_call_message", - "tool_call": { - "name": "database_write", - "arguments": "{\"query\": \"DELETE FROM test_data WHERE created_at < NOW() - INTERVAL 30 DAY\"}" - } - } - ], - "stop_reason": "requires_approval" -} -``` -```python python maxLines=50 -# Deny with explanation -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{ - "type": "approval", - "approvals": [{ - "approve": False, - "tool_call_id": "tool-xyz789", - "reason": "Only delete records older than 30 days, not all test data" - }] - }] -) - -# Response shows agent adjusting based on feedback -{ - "messages": [ - { - "message_type": "tool_return_message", - "status": "error", - "tool_return": "Error: request denied. Reason: Only delete records older than 30 days, not all test data" - }, - { - "message_type": "reasoning_message", - "reasoning": "I need to modify my query to only delete old records..." - }, - { - "message_type": "tool_call_message", - "tool_call": { - "name": "database_write", - "arguments": "{\"query\": \"DELETE FROM test_data WHERE created_at < NOW() - INTERVAL 30 DAY\"}" - } - } - ], - "stop_reason": "requires_approval" -} -``` -```typescript TypeScript maxLines=50 -// Deny with explanation -const response = await client.agents.messages.create({ - agentId: agent.id, - requestBody: { - messages: [{ - type: "approval", - approvals: [{ - approve: false, - tool_call_id: "tool-xyz789", - reason: "Only delete records older than 30 days, not all test data" - }] - }] - } -}); - -// Response shows agent adjusting based on feedback -{ - "messages": [ - { - "message_type": "tool_return_message", - "status": "error", - "tool_return": "Error: request denied. Reason: Only delete records older than 30 days, not all test data" - }, - { - "message_type": "reasoning_message", - "reasoning": "I need to modify my query to only delete old records..." - }, - { - "message_type": "tool_call_message", - "tool_call": { - "name": "database_write", - "arguments": "{\"query\": \"DELETE FROM test_data WHERE created_at < NOW() - INTERVAL 30 DAY\"}" - } - } - ], - "stop_reason": "requires_approval" -} -``` - - -### Streaming + Background Mode - -For streaming clients using background mode, approvals are best handled via `agents.messages.createStream(..., background: true)`. The approval response may include the `tool_return_message` on the approval stream itself, and follow‑up reasoning/assistant messages can be read by resuming that stream’s `run_id`. - - -Do not assume the `tool_return_message` will repeat after you resume. Treat the one on the approval stream as the source of truth, then resume to continue reading subsequent tokens. - - - -```curl curl maxLines=70 -# Approve in background after receiving approval_request_message -curl --request POST --url http://localhost:8283/v1/agents/$AGENT_ID/messages/stream --header 'Content-Type: application/json' --data '{ - "messages": [{"type": "approval", "approve": true, "approval_request_id": "message-abc"}], - "stream_tokens": true, - "background": true -}' - -# Example approval stream output (tool result arrives here): -data: {"run_id":"run-new","seq_id":0,"message_type":"tool_return_message","status":"success","tool_return":"..."} - -# Continue by resuming the approval stream's run -curl --request GET --url http://localhost:8283/v1/runs/$RUN_ID/stream --header 'Accept: text/event-stream' --data '{ - "starting_after": 0 -}' -``` -```python python maxLines=70 -# Receive an approval_request_message, then approve in background -approve = client.agents.messages.create_stream( - agent_id=agent.id, - messages=[{"type": "approval", "approvals": [{"approve": True, "tool_call_id": "tool-xyz789"}]}], - stream_tokens=True, - background=True, -) - -run_id = None -last_seq = 0 -for chunk in approve: - if hasattr(chunk, "run_id") and hasattr(chunk, "seq_id"): - run_id = chunk.run_id - last_seq = chunk.seq_id - if getattr(chunk, "message_type", None) == "tool_return_message": - # Tool result arrives here on the approval stream - break - -# Continue consuming output by resuming the background run -if run_id: - for chunk in client.runs.stream(run_id, starting_after=last_seq): - print(chunk) -``` -```typescript TypeScript maxLines=70 -// Receive an approval_request_message, then approve in background -const approve = await client.agents.messages.createStream({ - agentId: agent.id, - requestBody: { - messages: [{ type: "approval", approvals: [{ approve: true, tool_call_id: "tool-xyz789" }] }], - streamTokens: true, - background: true, - } -}); - -let runId: string | null = null; -let lastSeq = 0; -for await (const chunk of approve) { - if (chunk.run_id && chunk.seq_id) { runId = chunk.run_id; lastSeq = chunk.seq_id; } - if (chunk.message_type === "tool_return_message") { - // Tool result arrives here on the approval stream - break; - } -} - -// Continue consuming output by resuming the background run -if (runId) { - const resume = await client.runs.stream(runId, { startingAfter: lastSeq }); - for await (const chunk of resume) { - console.log(chunk); - } -} -``` - - - - - -**Run switching in background mode:** Approvals are separate background requests and create a new `run_id`. Save the approval stream cursor and resume that run. The original paused run will not deliver the tool result — do not wait for the tool return there. - - -See [background mode](/guides/agents/long-running) for resumption patterns. -### IDs and UI Triggers - -- **approval_request_id**: This field is now deprecated, but it is still used for backwards compatibility. Used `approval_request_message.id`. -- **tool_call_id**: Always send approvals/denials using the `tool_call_id` from the `ApprovalRequestMessage`. -- **UI trigger**: Open the approval UI on `approval_request_message` only; do not derive UI from `stop_reason`. diff --git a/fern/pages/agents/json_mode.mdx b/fern/pages/agents/json_mode.mdx deleted file mode 100644 index 78f05f7d..00000000 --- a/fern/pages/agents/json_mode.mdx +++ /dev/null @@ -1,468 +0,0 @@ ---- -title: JSON Mode & Structured Output -subtitle: Get structured JSON responses from your Letta agents -slug: guides/agents/json-mode ---- - -Letta provides two ways to get structured JSON output from agents: **Structured Generation through Tools** (recommended) and the `response_format` parameter. - -## Quick Comparison - - -**Recommended**: Use **Structured Generation through Tools** - works with all providers (Anthropic, OpenAI, Google, etc.) and integrates naturally with Letta's tool-calling architecture. - - - -**Structured Generation through Tools**: -- ✅ Universal provider compatibility -- ✅ Both reasoning AND structured output -- ✅ Per-message control -- ✅ Works even as "dummy tool" for pure formatting - - - -**`response_format` parameter**: -- ⚠️ OpenAI-compatible providers only (NOT Anthropic) -- ⚠️ Persistent agent state (affects all future responses) - -- ✅ Built-in provider schema enforcement - - -## Structured Generation through Tools (Recommended) - -Create a tool that defines your desired response format. The tool arguments become your structured data, and you can extract them from the tool call. - -### Creating a Structured Generation Tool - - -```typescript TypeScript maxLines=100 -import { LettaClient } from '@letta-ai/letta-client' - -// Create client (Letta Cloud) -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// Or for self-hosted -// const client = new LettaClient({ baseUrl: "http://localhost:8283" }); - -// First create the tool -const toolCode = `def generate_rank(rank: int, reason: str): - """Generate a ranking with explanation. - - Args: - rank (int): The numerical rank from 1-10. - reason (str): The reasoning behind the rank. - """ - print("Rank generated") - return`; - -const tool = await client.tools.create({ - sourceCode: toolCode, - sourceType: "python" -}); - -// Create agent with the structured generation tool -const agentState = await client.agents.create({ - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [ - { - label: "human", - value: "The human's name is Chad. They are a food enthusiast who enjoys trying different cuisines." - }, - { - label: "persona", - value: "I am a helpful food critic assistant. I provide detailed rankings and reviews of different foods and restaurants." - } - ], - toolIds: [tool.id] -}); -``` - -```python title="python" maxLines=100 -from letta_client import Letta - -# Create client (Letta Cloud) -client = Letta(token="LETTA_API_KEY") - -# Or for self-hosted -# client = Letta(base_url="http://localhost:8283") - -def generate_rank(rank: int, reason: str): - """Generate a ranking with explanation. - - Args: - rank (int): The numerical rank from 1-10. - reason (str): The reasoning behind the rank. - """ - print("Rank generated") - return - -# Create the tool -tool = client.tools.create(func=generate_rank) - -# Create agent with the structured generation tool -agent_state = client.agents.create( - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small", - memory_blocks=[ - { - "label": "human", - "value": "The human's name is Chad. They are a food enthusiast who enjoys trying different cuisines." - }, - { - "label": "persona", - "value": "I am a helpful food critic assistant. I provide detailed rankings and reviews of different foods and restaurants." - } - ], - tool_ids=[tool.id] -) -``` - - -### Using the Structured Generation Tool - - -```typescript TypeScript maxLines=100 -// Send message and instruct agent to use the tool -const response = await client.agents.messages.create( - agentState.id, { - messages: [ - { - role: "user", - content: "How do you rank sushi as a food? Please use the generate_rank tool to provide your response." - } - ] - } -); - -// Extract structured data from tool call -for (const message of response.messages) { - if (message.messageType === "tool_call_message") { - const args = JSON.parse(message.toolCall.arguments); - console.log(`Rank: ${args.rank}`); - console.log(`Reason: ${args.reason}`); - } -} - -// Example output: -// Rank: 8 -// Reason: Sushi is a highly regarded cuisine known for its fresh ingredients... -``` - -```python title="python" maxLines=100 -# Send message and instruct agent to use the tool -response = client.agents.messages.create( - agent_id=agent_state.id, - messages=[ - { - "role": "user", - "content": "How do you rank sushi as a food? Please use the generate_rank tool to provide your response." - } - ] -) - -# Extract structured data from tool call -for message in response.messages: - if message.message_type == "tool_call_message": - import json - args = json.loads(message.tool_call.arguments) - rank = args["rank"] - reason = args["reason"] - print(f"Rank: {rank}") - print(f"Reason: {reason}") - -# Example output: -# Rank: 8 -# Reason: Sushi is a highly regarded cuisine known for its fresh ingredients... -``` - - -The agent will call the tool, and you can extract the structured arguments: - -```json -{ - "rank": 8, - "reason": "Sushi is a highly regarded cuisine known for its fresh ingredients, artistic presentation, and cultural significance." -} -``` - -## Using `response_format` for Provider-Native JSON Mode - -The `response_format` parameter enables structured output/JSON mode from LLM providers that support it. This approach is fundamentally different from tools because **`response_format` becomes a persistent part of the agent's state** - once set, all future responses from that agent will follow the format until explicitly changed. - -Under the hood, `response_format` constrains the agent's assistant messages to follow the specified schema, but it doesn't affect tools - those continue to work normally with their original schemas. - - -**Requirements for `response_format`:** -- Only works with providers that support structured outputs (like OpenAI) - NOT Anthropic or other providers - - - -### Basic JSON Mode - - -```typescript TypeScript maxLines=100 -import { LettaClient } from '@letta-ai/letta-client' - -// Create client (Letta Cloud) -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// Create agent with basic JSON mode (OpenAI/compatible providers only) -const agentState = await client.agents.create({ - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [ - { - label: "human", - value: "The human's name is Chad. They work as a data analyst and prefer clear, organized information." - }, - { - label: "persona", - value: "I am a helpful assistant who provides clear and well-organized responses." - } - ], - responseFormat: { type: "json_object" } -}); - -// Send message expecting JSON response -const response = await client.agents.messages.create( - agentState.id, { - messages: [ - { - role: "user", - content: "How do you rank sushi as a food? Please respond in JSON format with rank and reason fields." - } - ] - } -); - -for (const message of response.messages) { - console.log(message); -} -``` - -```python title="python" maxLines=100 -from letta_client import Letta - -# Create client (Letta Cloud) -client = Letta(token="LETTA_API_KEY") - -# Create agent with basic JSON mode (OpenAI/compatible providers only) -agent_state = client.agents.create( - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small", - memory_blocks=[ - { - "label": "human", - "value": "The human's name is Chad. They work as a data analyst and prefer clear, organized information." - }, - { - "label": "persona", - "value": "I am a helpful assistant who provides clear and well-organized responses." - } - ], - response_format={"type": "json_object"} -) - -# Send message expecting JSON response -response = client.agents.messages.create( - agent_id=agent_state.id, - messages=[ - { - "role": "user", - "content": "How do you rank sushi as a food? Please respond in JSON format with rank and reason fields." - } - ] -) - -for message in response.messages: - print(message) -``` - - -### Advanced JSON Schema Mode - -For more precise control, you can use OpenAI's `json_schema` mode with strict validation: - - -```typescript TypeScript maxLines=100 -import { LettaClient } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// Define structured schema (from OpenAI structured outputs guide) -const responseFormat = { - type: "json_schema", - jsonSchema: { - name: "food_ranking", - schema: { - type: "object", - properties: { - rank: { - type: "integer", - minimum: 1, - maximum: 10 - }, - reason: { - type: "string" - }, - categories: { - type: "array", - items: { - type: "object", - properties: { - name: { type: "string" }, - score: { type: "integer" } - }, - required: ["name", "score"], - additionalProperties: false - } - } - }, - required: ["rank", "reason", "categories"], - additionalProperties: false - }, - strict: true - } -}; - -// Create agent -const agentState = await client.agents.create({ - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [] -}); - -// Update agent with response format -const updatedAgent = await client.agents.update( - agentState.id, - { responseFormat } -); - -// Send message -const response = await client.agents.messages.create( - agentState.id, { - messages: [ - { role: "user", content: "How do you rank sushi? Include categories for taste, presentation, and value." } - ] - } -); - -for (const message of response.messages) { - console.log(message); -} -``` - -```python title="python" maxLines=100 -from letta_client import Letta - -client = Letta(token="LETTA_API_KEY") - -# Define structured schema (from OpenAI structured outputs guide) -response_format = { - "type": "json_schema", - "json_schema": { - "name": "food_ranking", - "schema": { - "type": "object", - "properties": { - "rank": { - "type": "integer", - "minimum": 1, - "maximum": 10 - }, - "reason": { - "type": "string" - }, - "categories": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { "type": "string" }, - "score": { "type": "integer" } - }, - "required": ["name", "score"], - "additionalProperties": False - } - } - }, - "required": ["rank", "reason", "categories"], - "additionalProperties": False - }, - "strict": True - } -} - -# Create agent -agent_state = client.agents.create( - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small", - memory_blocks=[] -) - -# Update agent with response format -agent_state = client.agents.update( - agent_id=agent_state.id, - response_format=response_format -) - -# Send message -response = client.agents.messages.create( - agent_id=agent_state.id, - messages=[ - {"role": "user", "content": "How do you rank sushi? Include categories for taste, presentation, and value."} - ] -) - -for message in response.messages: - print(message) -``` - - -With structured JSON schema, the agent's response will be strictly validated: - -```json -{ - "rank": 8, - "reason": "Sushi is highly regarded for its fresh ingredients and artful presentation", - "categories": [ - {"name": "taste", "score": 9}, - {"name": "presentation", "score": 10}, - {"name": "value", "score": 6} - ] -} -``` - - -## Updating Agent Response Format - -You can update an existing agent's response format: - - -```typescript TypeScript maxLines=100 -// Update agent to use JSON mode (OpenAI/compatible only) -await client.agents.update(agentState.id, { - responseFormat: { type: "json_object" } -}); - -// Or remove JSON mode -await client.agents.update(agentState.id, { - responseFormat: null -}); -``` - -```python title="python" maxLines=100 -# Update agent to use JSON mode (OpenAI/compatible only) -client.agents.update( - agent_id=agent_state.id, - response_format={"type": "json_object"} -) - -# Or remove JSON mode -client.agents.update( - agent_id=agent_state.id, - response_format=None -) -``` - diff --git a/fern/pages/agents/memgpt_agents.mdx b/fern/pages/agents/memgpt_agents.mdx deleted file mode 100644 index 429f140b..00000000 --- a/fern/pages/agents/memgpt_agents.mdx +++ /dev/null @@ -1,217 +0,0 @@ ---- -title: Agent Memory & Architecture -subtitle: How Letta agents manage persistent, self-editing memory -slug: guides/agents/architectures/memgpt ---- - - -**Looking for legacy architecture documentation?** See [Legacy Architectures](/guides/legacy/memgpt_agents_legacy) for information on older agent types with send_message and heartbeats. - - - -Letta is made by the [creators of MemGPT](https://www.letta.com/about-us). The agent architecture in Letta is built on the MemGPT research paper's concepts of self-editing memory and memory hierarchy. - - -Letta agents solve the context window limitation of LLMs through context engineering across two tiers of memory: **in-context (core) memory** (including system instructions, read-write memory blocks, and conversation history), and **out-of-context memory** (older evicted conversation history and archival storage). - -To learn more about the research origins, read the [MemGPT research paper](https://arxiv.org/abs/2310.08560), or take the free [LLM OS course](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory/?utm_campaign=memgpt-launch&utm_content=331638345&utm_medium=social&utm_source=docs&hss_channel=tw-992153930095251456) on DeepLearning.ai. - -## Memory Hierarchy - -```mermaid -graph LR - subgraph CONTEXT[Context Window] - SYS[System Instructions] - CORE[Memory Blocks] - MSGS[Messages] - end - - RECALL[Recall Memory] - ARCH[Archival Memory] - - CONTEXT <--> RECALL - CONTEXT <--> ARCH -``` - -### In-context (core) memory - -Your agent's context window contains: -- **System instructions:** Your agent's base behavior and capabilities -- **Memory blocks:** Persistent, always-visible information (persona, user info, working state, etc.) -- **Recent messages:** Latest conversation history - -### Out-of-context memory - -When the context window fills up: -- **Recall memory:** Older messages searchable via `conversation_search` tool -- **Archival memory:** Long-term semantic storage searchable via `archival_memory_search` tool - -## Agent Architecture - -Letta's agent architecture follows modern LLM patterns: - -- **Native reasoning:** Uses model's built-in reasoning capabilities (Responses API for OpenAI, encrypted reasoning for other providers) -- **Direct messaging:** Agents respond with assistant messages -- **Compatibility:** Works with any LLM, tool calling not required -- **Self-directed termination:** Agents decide when to continue or stop - -This architecture is optimized for frontier models like GPT-5 and Claude Sonnet 4.5. - -[Learn more about the architecture evolution →](https://www.letta.com/blog/letta-v1-agent) - -## Memory Tools - -Letta agents have tools to manage their own memory: - -### Memory block editing -* `memory_insert` - Insert text into a memory block -* `memory_replace` - Replace specific text in a memory block -* `memory_rethink` - Completely rewrite a memory block - -### Recall memory -* `conversation_search` - Search prior conversation history - -### Archival memory -* `archival_memory_insert` - Store facts and knowledge long-term -* `archival_memory_search` - Query semantic storage - -[Learn more about memory tools →](/guides/agents/base-tools) - -## Creating Agents - -Agents are created with memory blocks that define their persistent context: - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -const agent = await client.agents.create({ - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [ - { - label: "human", - value: "The human's name is Chad. They like vibe coding." - }, - { - label: "persona", - value: "My name is Sam, the all-knowing sentient AI." - } - ], - tools: ["web_search", "run_code"] -}); -``` - -```python Python -from letta_client import Letta - -client = Letta(token="LETTA_API_KEY") - -agent = client.agents.create( - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small", - memory_blocks=[ - { - "label": "human", - "value": "The human's name is Chad. They like vibe coding." - }, - { - "label": "persona", - "value": "My name is Sam, the all-knowing sentient AI." - } - ], - tools=["web_search", "run_code"] -) -``` - -```bash cURL -curl -X POST https://api.letta.com/v1/agents \ - -H "Authorization: Bearer $LETTA_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "openai/gpt-4o-mini", - "embedding": "openai/text-embedding-3-small", - "memory_blocks": [ - { - "label": "human", - "value": "The human'\''s name is Chad. They like vibe coding." - }, - { - "label": "persona", - "value": "My name is Sam, the all-knowing sentient AI." - } - ], - "tools": ["web_search", "run_code"] -}' -``` - - -## Context Window Management - -When the context window fills up, Letta automatically: -1. Compacts older messages into a recursive summary -2. Moves full message history to recall storage -3. Agent can search recall with `conversation_search` tool - -This happens transparently - your agent maintains continuity. - -## Populating Archival Memory - -Agents can insert memories during conversations, or you can populate archival memory programmatically: - - -```typescript TypeScript -// Insert a memory via SDK -await client.agents.passages.insert(agent.id, { - content: "The user prefers TypeScript over JavaScript for type safety.", - tags: ["preferences", "languages"] -}); - -// Agent can now search this -// Agent calls: archival_memory_search(query="language preferences") -``` -```python Python -# Insert a memory via SDK -client.agents.passages.insert( - agent_id=agent.id, - content="The user prefers TypeScript over JavaScript for type safety.", - tags=["preferences", "languages"] -) - -# Agent can now search this -# Agent calls: archival_memory_search(query="language preferences") -``` - - -[Learn more about archival memory →](/guides/agents/archival-memory) - -## Research Background - -Key concepts from the MemGPT research: - -- **Self-editing memory:** Agents actively manage their own memory -- **Memory hierarchy:** In-context vs out-of-context storage -- **Tool-based memory management:** Agents decide what to remember -- **Stateful agents:** Persistent memory across all interactions - -[Read the MemGPT paper →](https://arxiv.org/abs/2310.08560) -[Take the free course →](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory) - -## Next Steps - - - - Deep dive into memory block structure - - - Long-term semantic storage - - - Built-in tools for memory management - - - Optimizing agent memory usage - - diff --git a/fern/pages/agents/memory_blocks.mdx b/fern/pages/agents/memory_blocks.mdx deleted file mode 100644 index 8dfff8c2..00000000 --- a/fern/pages/agents/memory_blocks.mdx +++ /dev/null @@ -1,415 +0,0 @@ ---- -title: Memory Blocks -subtitle: Understanding the building blocks of agent memory -slug: guides/agents/memory-blocks ---- - - -Interested in learning more about the origin of memory blocks? Read our [blog post](https://www.letta.com/blog/memory-blocks). - - -## What are memory blocks? - -Memory blocks are structured sections of the agent's context window that persist across all interactions. They are always visible - no retrieval needed. - -**Memory blocks are Letta's core abstraction.** Create a block with a descriptive label and the agent learns how to use it. This simple mechanism enables capabilities impossible with traditional context management. - -**Key properties:** -- **Agent-managed** - Agents autonomously organize information based on block labels -- **Flexible** - Use for any purpose: knowledge, guidelines, state tracking, scratchpad space -- **Shareable** - Multiple agents can access the same block; update once, visible everywhere -- **Always visible** - Blocks stay in context, never need retrieval - -**Examples:** -- Store tool usage guidelines so agents avoid past mistakes -- Maintain working memory in a scratchpad block -- Mirror external state (user's current document) for real-time awareness -- Share read-only policies across all agents from a central source -- Coordinate multi-agent systems: parent agents watch subagent result blocks update in real-time -- Enable emergent behavior: add `performance_tracking` or `emotional_state` and watch agents start using them - -Memory blocks aren't just storage - they're a coordination primitive that enables sophisticated agent behavior. - -## Memory block structure - -Memory blocks represent a section of an agent's context window. An agent may have multiple memory blocks, or none at all. A memory block consists of: -* A `label`, which is a unique identifier for the block -* A `description`, which describes the purpose of the block -* A `value`, which is the contents/data of the block -* A `limit`, which is the size limit (in characters) of the block - -## The importance of the `description` field - -When making memory blocks, it's crucial to provide a good `description` field that accurately describes what the block should be used for. -The `description` is the main information used by the agent to determine how to read and write to that block. Without a good description, the agent may not understand how to use the block. - -Because `persona` and `human` are two popular block labels, Letta autogenerates default descriptions for these blocks if you don't provide them. If you provide a description for a memory block labelled `persona` or `human`, the default description will be overridden. - -For `persona`, a good default is: -> The persona block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions. - -For `human`, a good default is: -> The human block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation. - -## Read-only blocks - -Memory blocks are read-write by default (so the agent can update the block using memory tools), but can be set to read-only by setting the `read_only` field to `true`. When a block is read-only, the agent cannot update the block. - -Read-only blocks are useful when you want to give an agent access to information (for example, a shared memory block about an organization), but you don't want the agent to be able to make potentially destructive changes to the block. - -## Creating an agent with memory blocks - -When you create an agent, you can specify memory blocks to also be created with the agent. For most chat applications, we recommend create a `human` block (to represent memories about the user) and a `persona` block (to represent the agent's persona). - -```typescript TypeScript maxLines=50 -// install letta-client with `npm install @letta-ai/letta-client` -import { LettaClient } from '@letta-ai/letta-client' - -// create a client to connect to your local Letta server -const client = new LettaClient({ - baseUrl: "http://localhost:8283" -}); - -// create an agent with two basic self-editing memory blocks -const agentState = await client.agents.create({ - memoryBlocks: [ - { - label: "human", - value: "The human's name is Bob the Builder.", - limit: 5000 - }, - { - label: "persona", - value: "My name is Sam, the all-knowing sentient AI.", - limit: 5000 - } - ], - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small" -}); -``` -```python title="python" maxLines=50 -# install letta_client with `pip install letta-client` -from letta_client import Letta - -# create a client to connect to your local Letta server -client = Letta( - base_url="http://localhost:8283" -) - -# create an agent with two basic self-editing memory blocks -agent_state = client.agents.create( - memory_blocks=[ - { - "label": "human", - "value": "The human's name is Bob the Builder.", - "limit": 5000 - }, - { - "label": "persona", - "value": "My name is Sam, the all-knowing sentient AI.", - "limit": 5000 - } - ], - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small" -) -``` - -When the agent is created, the corresponding blocks are also created and attached to the agent, so that the block value will be in the context window. - -## Creating and attaching memory blocks -You can also directly create blocks and attach them to an agent. This can be useful if you want to create blocks that are shared between multiple agents. If multiple agents are attached to a block, they will all have the block data in their context windows (essentially providing shared memory). - -Below is an example of creating a block directory, and attaching the block to two agents by specifying the `block_ids` field. - -```typescript TypeScript maxLines=50 -// create a persisted block, which can be attached to agents -const block = await client.blocks.create({ - label: "organization", - description: "A block to store information about the organization", - value: "Organization: Letta", - limit: 4000, -}); - -// create an agent with both a shared block and its own blocks -const sharedBlockAgent1 = await client.agents.create({ - name: "shared_block_agent1", - memoryBlocks: [ - { - label: "persona", - value: "I am agent 1" - }, - ], - blockIds: [block.id], - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small" - -}); - -// create another agent sharing the block -const sharedBlockAgent2 = await client.agents.create({ - name: "shared_block_agent2", - memoryBlocks: [ - { - label: "persona", - value: "I am agent 2" - }, - ], - blockIds: [block.id], - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small" -}); -``` -```python title="python" maxLines=50 -# create a persisted block, which can be attached to agents -block = client.blocks.create( - label="organization", - description="A block to store information about the organization", - value="Organization: Letta", - limit=4000, -) - -# create an agent with both a shared block and its own blocks -shared_block_agent1 = client.agents.create( - name="shared_block_agent1", - memory_blocks=[ - { - "label": "persona", - "value": "I am agent 1" - }, - ], - block_ids=[block.id], - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small" -) - -# create another agent sharing the block -shared_block_agent2 = client.agents.create( - name="shared_block_agent2", - memory_blocks=[ - { - "label": "persona", - "value": "I am agent 2" - }, - ], - block_ids=[block.id], - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small" -) -``` - -You can also attach blocks to existing agents: - -```typescript TypeScript -await client.agents.blocks.attach(agent.id, block.id); -``` -```python Python -client.agents.blocks.attach(agent_id=agent.id, block_id=block.id) -``` - -You can see all agents attached to a block by using the `block_id` field in the [blocks retrieve](/api-reference/blocks/retrieve) endpoint. - -## Managing blocks - -### Retrieving a block -You can retrieve the contents of a block by ID. This is useful when blocks store finalized reports, code outputs, or other data you want to extract for use outside the agent. - - -```typescript TypeScript -const block = await client.blocks.retrieve(block.id); -console.log(block.value); // access the block's content -``` -```python Python -block = client.blocks.retrieve(block.id) -print(block.value) # access the block's content -``` - - -### Listing blocks -You can list all blocks, optionally filtering by label or searching by label text. This is useful for finding blocks across your project. - - -```typescript TypeScript -// list all blocks -const blocks = await client.blocks.list(); - -// filter by label -const humanBlocks = await client.blocks.list({ - label: "human" -}); - -// search by label text -const searchResults = await client.blocks.list({ - labelSearch: "organization" -}); -``` -```python Python -# list all blocks -blocks = client.blocks.list() - -# filter by label -human_blocks = client.blocks.list(label="human") - -# search by label text -search_results = client.blocks.list(label_search="organization") -``` - - -### Modifying a block -You can directly modify a block's content, limit, description, or other properties. This is particularly useful for: -- External scripts that provide up-to-date information to agents (e.g., syncing a text file to a block) -- Updating shared blocks that multiple agents reference -- Programmatically managing block content outside of agent interactions - - -```typescript TypeScript -// update the block's value - completely replaces the content -await client.blocks.modify(block.id, { - value: "Updated organization information: Letta - Building agentic AI" -}); - -// update multiple properties -await client.blocks.modify(block.id, { - value: "New content", - limit: 6000, - description: "Updated description" -}); -``` -```python Python -# update the block's value - completely replaces the content -client.blocks.modify( - block.id, - value="Updated organization information: Letta - Building agentic AI" -) - -# update multiple properties -client.blocks.modify( - block.id, - value="New content", - limit=6000, - description="Updated description" -) -``` - - - -**Setting `value` completely replaces the entire block content** - it is not an append operation. If multiple processes (agents or external scripts) modify the same block concurrently, the last write wins and overwrites all earlier changes. To avoid data loss: -- Set blocks to **read-only** if you don't want agents to modify them -- Only modify blocks directly in controlled scenarios where overwriting is acceptable -- Ensure your application logic accounts for full replacements, not merges - - -### Deleting a block -You can delete a block when it's no longer needed. Note that deleting a block will remove it from all agents that have it attached. - - -```typescript TypeScript -await client.blocks.delete(block.id); -``` -```python Python -client.blocks.delete(block_id=block.id) -``` - - -### Inspecting block usage -See which agents have a block attached: - - -```typescript TypeScript -// list all agents that use this block -const agentsWithBlock = await client.blocks.agents.list(block.id); -console.log(`Used by ${agentsWithBlock.length} agents:`); -for (const agent of agentsWithBlock) { - console.log(` - ${agent.name}`); -} - -// with pagination -const agentsPage = await client.blocks.agents.list(block.id, { - limit: 10, - order: "asc" -}); -``` -```python Python -# list all agents that use this block -agents_with_block = client.blocks.agents.list(block_id=block.id) -print(f"Used by {len(agents_with_block)} agents:") -for agent in agents_with_block: - print(f" - {agent.name}") - -# with pagination -agents_page = client.blocks.agents.list( - block_id=block.id, - limit=10, - order="asc" -) -``` - - -## Agent-scoped block operations - -### Listing an agent's blocks -You can retrieve all blocks attached to a specific agent. This shows you the complete memory configuration for that agent. - - -```typescript TypeScript -const agentBlocks = await client.agents.blocks.list(agent.id); -``` -```python Python -agent_blocks = client.agents.blocks.list(agent_id=agent.id) -``` - - -### Retrieving an agent's block by label -Instead of using a block ID, you can retrieve a block from a specific agent using its label. This is useful when you want to inspect what the agent currently knows about a specific topic. - - -```typescript TypeScript -// get the agent's current knowledge about the human -const humanBlock = await client.agents.blocks.retrieve( - agent.id, - "human" -); -console.log(humanBlock.value); -``` -```python Python -# get the agent's current knowledge about the human -human_block = client.agents.blocks.retrieve( - agent_id=agent.id, - block_label="human" -) -print(human_block.value) -``` - - -### Modifying an agent's block -You can modify a block through the agent-scoped endpoint using the block's label. This is useful for updating agent-specific memory without needing to know the block ID. - - -```typescript TypeScript -// update the agent's human block -await client.agents.blocks.modify(agent.id, "human", { - value: "The human's name is Alice. She prefers Python over TypeScript." -}); -``` -```python Python -# update the agent's human block -client.agents.blocks.modify( - agent_id=agent.id, - block_label="human", - value="The human's name is Alice. She prefers Python over TypeScript." -) -``` - - -### Detaching blocks from agents -You can detach a block from an agent's context window. This removes the block from the agent's memory without deleting the block itself. - - -```typescript TypeScript -await client.agents.blocks.detach(agent.id, block.id); -``` -```python Python -client.agents.blocks.detach(agent_id=agent.id, block_id=block.id) -``` - diff --git a/fern/pages/agents/message_types.mdx b/fern/pages/agents/message_types.mdx deleted file mode 100644 index c57f2a33..00000000 --- a/fern/pages/agents/message_types.mdx +++ /dev/null @@ -1,459 +0,0 @@ ---- -title: Message Types -subtitle: Understanding message types and working with agent message history -slug: guides/agents/message-types ---- - -When you interact with a Letta agent and retrieve its message history using `client.agents.messages.list()`, you'll receive various types of messages that represent different aspects of the agent's execution. This guide explains all message types and how to work with them. - -## Overview - -Letta uses a structured message system where each message has a specific `message_type` field that indicates its purpose. Messages are returned as instances of `LettaMessageUnion`, which is a discriminated union of all possible message types. - -## Message Type Categories - -### User and System Messages - -#### `user_message` -Messages sent by the user or system events packaged as user input. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "user_message"; - content: string | Array; - name?: string; - otid?: string; - sender_id?: string; -} -``` - -**Special User Message Subtypes:** -User messages can contain JSON with a `type` field indicating special message subtypes: - -- **`login`** - User login events - ```json - { - "type": "login", - "last_login": "Never (first login)", - "time": "2025-10-03 12:34:56 PM PDT-0700" - } - ``` - -- **`user_message`** - Standard user messages - ```json - { - "type": "user_message", - "message": "Hello, agent!", - "time": "2025-10-03 12:34:56 PM PDT-0700" - } - ``` - -- **`system_alert`** - System notifications and alerts - ```json - { - "type": "system_alert", - "message": "System notification text", - "time": "2025-10-03 12:34:56 PM PDT-0700" - } - ``` - -#### `system_message` -Messages generated by the system, typically used for internal context. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "system_message"; - content: string; - name?: string; -} -``` - -**Note:** System messages are never streamed back in responses; they're only visible when paginating through message history. - -### Agent Reasoning and Responses - -#### `reasoning_message` -Represents the agent's internal reasoning or "chain of thought." - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "reasoning_message"; - reasoning: string; - source: "reasoner_model" | "non_reasoner_model"; - signature?: string; -} -``` - -**Fields:** -- `reasoning` - The agent's internal thought process -- `source` - Whether this was generated by a model with native reasoning (like o1) or via prompting -- `signature` - Optional cryptographic signature for reasoning verification (for models that support it) - -#### `hidden_reasoning_message` -Represents reasoning that has been hidden from the response. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "hidden_reasoning_message"; - state: "redacted" | "omitted"; - hidden_reasoning?: string; -} -``` - -**Fields:** -- `state: "redacted"` - The provider redacted the reasoning content -- `state: "omitted"` - The API chose not to include reasoning (e.g., for o1/o3 models) - -#### `assistant_message` -The actual message content sent by the agent. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "assistant_message"; - content: string | Array; - name?: string; -} -``` - -### Tool Execution Messages - -#### `tool_call_message` -A request from the agent to execute a tool. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "tool_call_message"; - tool_call: { - name: string; - arguments: string; // JSON string - tool_call_id: string; - }; -} -``` - -**Example:** -```typescript -{ - message_type: "tool_call_message", - tool_call: { - name: "archival_memory_search", - arguments: '{"query": "user preferences", "page": 0}', - tool_call_id: "call_abc123" - } -} -``` - -#### `tool_return_message` -The result of a tool execution. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "tool_return_message"; - tool_return: string; - status: "success" | "error"; - tool_call_id: string; - stdout?: string[]; - stderr?: string[]; -} -``` - -**Fields:** -- `tool_return` - The formatted return value from the tool -- `status` - Whether the tool executed successfully -- `stdout`/`stderr` - Captured output from the tool execution (useful for debugging) - -### Human-in-the-Loop Messages - -#### `approval_request_message` -A request for human approval before executing a tool. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "approval_request_message"; - tool_call: { - name: string; - arguments: string; - tool_call_id: string; - }; -} -``` - -See [Human-in-the-Loop](/guides/agents/human_in_the_loop) for more information on this experimental feature. - -#### `approval_response_message` -The user's response to an approval request. - -**Structure:** -```typescript -{ - id: string; - date: datetime; - message_type: "approval_response_message"; - approve: boolean; - approval_request_id: string; - reason?: string; -} -``` - -## Working with Messages - -### Listing Messages - - -```typescript TypeScript -import { LettaClient } from "@letta-ai/letta-client"; - -const client = new LettaClient({ - baseUrl: "https://api.letta.com", -}); - -// List recent messages -const messages = await client.agents.messages.list("agent-id", { - limit: 50, - useAssistantMessage: true, -}); - -// Iterate through message types -for (const message of messages) { - switch (message.messageType) { - case "user_message": - console.log("User:", message.content); - break; - case "assistant_message": - console.log("Agent:", message.content); - break; - case "reasoning_message": - console.log("Reasoning:", message.reasoning); - break; - case "tool_call_message": - console.log("Tool call:", message.toolCall.name); - break; - // ... handle other types - } -} -``` - -```python Python -from letta_client import Letta - -client = Letta(base_url="https://api.letta.com") - -# List recent messages -messages = client.agents.messages.list( - agent_id="agent-id", - limit=50, - use_assistant_message=True -) - -# Iterate through message types -for message in messages: - if message.message_type == "user_message": - print(f"User: {message.content}") - elif message.message_type == "assistant_message": - print(f"Agent: {message.content}") - elif message.message_type == "reasoning_message": - print(f"Reasoning: {message.reasoning}") - elif message.message_type == "tool_call_message": - print(f"Tool call: {message.tool_call.name}") - # ... handle other types -``` - - -### Filtering Messages by Type - - -```typescript TypeScript -// Get only assistant messages (what the agent said to the user) -const agentMessages = messages.filter( - (msg) => msg.messageType === "assistant_message" -); - -// Get all tool-related messages -const toolMessages = messages.filter( - (msg) => msg.messageType === "tool_call_message" || - msg.messageType === "tool_return_message" -); - -// Get conversation history (user + assistant messages only) -const conversation = messages.filter( - (msg) => msg.messageType === "user_message" || - msg.messageType === "assistant_message" -); -``` - -```python Python -# Get only assistant messages (what the agent said to the user) -agent_messages = [ - msg for msg in messages - if msg.message_type == "assistant_message" -] - -# Get all tool-related messages -tool_messages = [ - msg for msg in messages - if msg.message_type in ["tool_call_message", "tool_return_message"] -] - -# Get conversation history (user + assistant messages only) -conversation = [ - msg for msg in messages - if msg.message_type in ["user_message", "assistant_message"] -] -``` - - - -### Pagination - -Messages support cursor-based pagination: - - -```typescript TypeScript -// Get first page -let messages = await client.agents.messages.list("agent-id", { - limit: 100, -}); - -// Get next page using the last message ID -const lastMessageId = messages[messages.length - 1].id; -const nextPage = await client.agents.messages.list("agent-id", { - limit: 100, - before: lastMessageId, -}); -``` - -```python Python -# Get first page -messages = client.agents.messages.list( - agent_id="agent-id", - limit=100 -) - -# Get next page using the last message ID -last_message_id = messages[-1].id -next_page = client.agents.messages.list( - agent_id="agent-id", - limit=100, - before=last_message_id -) -``` - - -## Message Metadata Fields - -All message types include these common fields: - -- **`id`** - Unique identifier for the message -- **`date`** - ISO 8601 timestamp of when the message was created -- **`message_type`** - The discriminator field identifying the message type -- **`name`** - Optional name field (varies by message type) -- **`otid`** - Offline threading ID for message correlation -- **`sender_id`** - The ID of the sender (identity or agent ID) -- **`step_id`** - The step ID associated with this message -- **`is_err`** - Whether this message is part of an error step (debugging only) -- **`seq_id`** - Sequence ID for ordering -- **`run_id`** - The run ID associated with this message - -## Best Practices - -### 1. Use Type Discriminators - -Always check the `message_type` field to safely access type-specific fields: - - -```typescript TypeScript -if (message.messageType === "tool_call_message") { - // TypeScript now knows message has a toolCall field - console.log(message.toolCall.name); -} -``` - -```python Python -if message.message_type == "tool_call_message": - # Safe to access tool_call - print(message.tool_call.name) -``` - - -### 2. Handle Special User Messages - -When displaying conversations to end users, filter out internal messages: - -```python -def is_internal_message(msg): - """Check if a user message is internal (login, system_alert, etc.)""" - if msg.message_type != "user_message": - return False - - if not isinstance(msg.content, str): - return False - - try: - parsed = json.loads(msg.content) - return parsed.get("type") in ["login", "system_alert"] - except: - return False - -# Get user-facing messages only -display_messages = [ - msg for msg in messages - if not is_internal_message(msg) -] -``` - -### 3. Track Tool Execution - -Match tool calls with their returns using `tool_call_id`: - -```python -# Build a map of tool calls to their returns -tool_calls = { - msg.tool_call.tool_call_id: msg - for msg in messages - if msg.message_type == "tool_call_message" -} - -tool_returns = { - msg.tool_call_id: msg - for msg in messages - if msg.message_type == "tool_return_message" -} - -# Find failed tool calls -for call_id, call_msg in tool_calls.items(): - if call_id in tool_returns: - return_msg = tool_returns[call_id] - if return_msg.status == "error": - print(f"Tool {call_msg.tool_call.name} failed:") - print(f" {return_msg.tool_return}") -``` - -## See Also - -- [Human-in-the-Loop](/guides/agents/human_in_the_loop) - Using approval messages -- [Streaming Responses](/guides/agents/streaming) - Receiving messages in real-time -- [API Reference](/api-reference/agents/messages/list) - Full API documentation diff --git a/fern/pages/agents/overview.mdx b/fern/pages/agents/overview.mdx deleted file mode 100644 index 516d5932..00000000 --- a/fern/pages/agents/overview.mdx +++ /dev/null @@ -1,279 +0,0 @@ ---- -title: Building Stateful Agents with Letta -slug: guides/agents/overview ---- - - -**New to Letta?** If you haven't already, read [Core Concepts](/core-concepts) to understand how Letta's stateful agents are fundamentally different from traditional LLM APIs. - - -Letta agents can automatically manage long-term memory, load data from external sources, and call custom tools. -Unlike in other frameworks, Letta agents are stateful, so they keep track of historical interactions and reserve part of their context to read and write memories which evolve over time. - - - - - -Letta manages a reasoning loop for agents. At each agent step (i.e. iteration of the loop), the state of the agent is checkpointed and persisted to the database. - -You can interact with agents from a REST API, the ADE, and TypeScript / Python SDKs. -As long as they are connected to the same service, all of these interfaces can be used to interact with the same agents. - - -If you're interested in learning more about stateful agents, read our [blog post](https://www.letta.com/blog/stateful-agents). - - -## Agents vs Threads - -In Letta, you can think of an agent as a single entity that has a single message history which is treated as infinite. -The sequence of interactions the agent has experienced through its existence make up the agent's state (or memory). - -One distinction between Letta and other agent frameworks is that Letta does not have the notion of message *threads* (or *sessions*). -Instead, there are only *stateful agents*, which have a single perpetual thread (sequence of messages). - -The reason we use the term *agent* rather than *thread* is because Letta is based on the principle that **all agents interactions should be part of the persistent memory**, as opposed to building agent applications around ephemeral, short-lived interactions (like a thread or session). -```mermaid -%%{init: {'flowchart': {'rankDir': 'LR'}}}%% -flowchart LR - subgraph Traditional["Thread-Based Agents"] - direction TB - llm1[LLM] --> thread1["Thread 1 - -------- - Ephemeral - Session"] - llm1 --> thread2["Thread 2 - -------- - Ephemeral - Session"] - llm1 --> thread3["Thread 3 - -------- - Ephemeral - Session"] - end - - Traditional ~~~ Letta - - subgraph Letta["Letta Stateful Agents"] - direction TB - llm2[LLM] --> agent["Single Agent - -------- - Persistent Memory"] - agent --> db[(PostgreSQL)] - db -->|"Learn & Update"| agent - end - - class thread1,thread2,thread3 session - class agent agent -``` - -If you would like to create common starting points for new conversation "threads", we recommending using [agent templates](/guides/templates/overview) to create new agents for each conversation, or directly copying agent state from an existing agent. - -For multi-users applications, we recommend creating an agent per-user, though you can also have multiple users message a single agent (but it will be a single shared message history). - -## Create an agent - -To start creating agents, you can run a Letta server locally using **Letta Desktop**, deploy a server locally + remotely with **Docker**, or use **Letta Cloud**. See our [quickstart guide](/quickstart) for more information. - - -Assuming we're running a Letta server locally at `http://localhost:8283`, we can create a new agent via the REST API, Python SDK, or TypeScript SDK: - -```curl curl -curl -X POST http://localhost:8283/v1/agents/ \ - -H "Content-Type: application/json" \ - -d '{ - "memory_blocks": [ - { - "value": "The human'\''s name is Bob the Builder.", - "label": "human" - }, - { - "value": "My name is Sam, the all-knowing sentient AI.", - "label": "persona" - } - ], - "model": "openai/gpt-4o-mini", - "context_window_limit": 16000, - "embedding": "openai/text-embedding-3-small" -}' -``` -```python title="python" maxLines=50 -# install letta_client with `pip install letta-client` -from letta_client import Letta - -# create a client to connect to your local Letta server -client = Letta( - base_url="http://localhost:8283" -) - -# create an agent with two basic self-editing memory blocks -agent_state = client.agents.create( - memory_blocks=[ - { - "label": "human", - "value": "The human's name is Bob the Builder." - }, - { - "label": "persona", - "value": "My name is Sam, the all-knowing sentient AI." - } - ], - model="openai/gpt-4o-mini", - context_window_limit=16000, - embedding="openai/text-embedding-3-small" -) - -# the AgentState object contains all the information about the agent -print(agent_state) -``` -```typescript TypeScript maxLines=50 -// install letta-client with `npm install @letta-ai/letta-client` -import { LettaClient } from '@letta-ai/letta-client' - -// create a client to connect to your local Letta server -const client = new LettaClient({ - baseUrl: "http://localhost:8283" -}); - -// create an agent with two basic self-editing memory blocks -const agentState = await client.agents.create({ - memoryBlocks: [ - { - label: "human", - value: "The human's name is Bob the Builder." - }, - { - label: "persona", - value: "My name is Sam, the all-knowing sentient AI." - } - ], - model: "openai/gpt-4o-mini", - contextWindowLimit: 16000, - embedding: "openai/text-embedding-3-small" -}); - -// the AgentState object contains all the information about the agent -console.log(agentState); -``` - -You can also create an agent without any code using the [Agent Development Environment (ADE)](/agent-development-environment). -All Letta agents are stored in a database on the Letta server, so you can access the same agents from the ADE, the REST API, the Python SDK, and the TypeScript SDK. - -The response will include information about the agent, including its `id`: -```json -{ - "id": "agent-43f8e098-1021-4545-9395-446f788d7389", - "name": "GracefulFirefly", - ... -} -``` - -Once an agent is created, you can message it: - -```curl curl -curl --request POST \ - --url http://localhost:8283/v1/agents/$AGENT_ID/messages \ - --header 'Content-Type: application/json' \ - --data '{ - "messages": [ - { - "role": "user", - "content": "hows it going????" - } - ] -}' -``` -```python title="python" maxLines=50 -# send a message to the agent -response = client.agents.messages.create( - agent_id=agent_state.id, - messages=[ - { - "role": "user", - "content": "hows it going????" - } - ] -) - -# the response object contains the messages and usage statistics -print(response) - -# if we want to print the usage stats -print(response.usage) - -# if we want to print the messages -for message in response.messages: - print(message) -``` -```typescript TypeScript maxLines=50 -// send a message to the agent -const response = await client.agents.messages.create( - agentState.id, { - messages: [ - { - role: "user", - content: "hows it going????" - } - ] - } -); - -// the response object contains the messages and usage statistics -console.log(response); - -// if we want to print the usage stats -console.log(response.usage) - -// if we want to print the messages -for (const message of response.messages) { - console.log(message); -} -``` - - -### Message Types -The `response` object contains the following attributes: -* `usage`: The usage of the agent after the message was sent (the prompt tokens, completition tokens, and total tokens) -* `message`: A list of `LettaMessage` objects, generated by the agent - -#### `LettaMessage` -The `LettaMessage` object is a simplified version of the `Message` object stored in the database backend. -Since a `Message` can include multiple events like a chain-of-thought and function calls, `LettaMessage` simplifies messages to have the following types: -* `reasoning_message`: The inner monologue (chain-of-thought) of the agent -* `tool_call_message`: An agent's tool (function) call -* `tool_call_return`: The result of executing an agent's tool (function) call -* `assistant_message`: An agent's response message (direct response in current architecture, or `send_message` tool call in legacy architectures) -* `system_message`: A system message (for example, an alert about the user logging in) -* `user_message`: A user message - - -In current Letta agents, `assistant_message` represents the agent's direct response. In legacy architectures (`memgpt_agent`, `memgpt_v2_agent`), it wraps the `send_message` tool call. - -If you prefer to see the raw tool call format in legacy agents, you can set `use_assistant_message` to `false` in the request `config` (see the [endpoint documentation](/api-reference/agents/messages/create)). - - -## Common agent operations -For more in-depth guide on the full set of Letta agent operations, check out our [API reference](/api-reference/overview), our extended [Python SDK](https://github.com/letta-ai/letta/blob/main/examples/docs/example.py) and [TypeScript SDK](https://github.com/letta-ai/letta/blob/main/examples/docs/node/example.ts) examples, as well as our other [cookbooks](/cookbooks). - -If you're using a self-hosted Letta server, you should set the **base URL** (`base_url` in Python, `baseUrl` in TypeScript) to the Letta server's URL (e.g. `http://localhost:8283`) when you create your client. See an example [here](/api-reference/overview). - -If you're using a self-hosted server, you can omit the token if you're not using [password protection](/guides/server/docker#password-protection-advanced). -If you are using password protection, set your **token** to the **password**. -If you're using Letta Cloud, you should set the **token** to your **Letta Cloud API key**. - -### Retrieving an agent's state -The agent's state is always persisted, so you can retrieve an agent's state by its ID. - - -The result of the call is an `AgentState` object: - - -### List agents -Replace `agent_id` with your actual agent ID. - - -The result of the call is a list of `AgentState` objects: - - -### Delete an agent -To delete an agent, you can use the `DELETE` endpoint with your `agent_id`: - diff --git a/fern/pages/agents/prebuilt_tools.mdx b/fern/pages/agents/prebuilt_tools.mdx deleted file mode 100644 index 749c7ad8..00000000 --- a/fern/pages/agents/prebuilt_tools.mdx +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Utilities -subtitle: Pre-built tools for web access, code execution, and data fetching -slug: guides/agents/prebuilt-tools ---- - -Letta provides pre-built tools that enable agents to search the web, execute code, and fetch webpage content. - -## Available Utilities - -### [Web Search](/guides/agents/web-search) - -Search the internet in real-time using [Exa](https://exa.ai)'s AI-powered search engine. - -```python -agent = client.agents.create( - tools=["web_search"], - memory_blocks=[{ - "label": "persona", - "value": "I use web_search for current events and external research." - }] -) -``` - -**Key features:** -- AI-powered semantic search -- Category filtering (news, research papers, PDFs, etc.) -- Domain filtering -- Date range filtering -- Highlights and AI-generated summaries - -**Setup:** Works out of the box on Letta Cloud. Self-hosted requires `EXA_API_KEY`. - -[Read full documentation →](/guides/agents/web-search) - ---- - -### [Code Interpreter](/guides/agents/run-code) - -Execute code in a secure sandbox with full network access. - -```python -agent = client.agents.create( - tools=["run_code"], - memory_blocks=[{ - "label": "persona", - "value": "I use Python for data analysis and API calls." - }] -) -``` - -**Key features:** -- Python with 191+ pre-installed packages (numpy, pandas, scipy, etc.) -- JavaScript, TypeScript, R, and Java support -- Full network access for API calls -- Fresh environment per execution (no state persistence) - -**Setup:** Works out of the box on Letta Cloud. Self-hosted requires `E2B_API_KEY`. - -[Read full documentation →](/guides/agents/run-code) - ---- - -### [Fetch Webpage](/guides/agents/fetch-webpage) - -Fetch and convert webpages to readable text/markdown. - -```python -agent = client.agents.create( - tools=["fetch_webpage"], - memory_blocks=[{ - "label": "persona", - "value": "I fetch and read webpages to answer questions." - }] -) -``` - -**Key features:** -- Converts HTML to clean markdown -- Extracts article content -- Multiple fallback extraction methods -- Optional Exa integration for enhanced extraction - -**Setup:** Works out of the box everywhere. Optional `EXA_API_KEY` for enhanced extraction. - -[Read full documentation →](/guides/agents/fetch-webpage) - ---- - -## Related Documentation - -- [Custom Tools](/guides/agents/custom-tools) -- [Tool Variables](/guides/agents/tool-variables) -- [Model Context Protocol](/guides/mcp/overview) diff --git a/fern/pages/agents/run_code.mdx b/fern/pages/agents/run_code.mdx deleted file mode 100644 index d387d2ad..00000000 --- a/fern/pages/agents/run_code.mdx +++ /dev/null @@ -1,258 +0,0 @@ ---- -title: Code Interpreter -subtitle: Execute code in a secure sandbox with full network access -slug: guides/agents/run-code ---- - -The `run_code` tool enables Letta agents to execute code in a secure sandboxed environment. Useful for data analysis, calculations, API calls, and programmatic computation. - - -On [Letta Cloud](/guides/cloud/overview), this tool works out of the box. For self-hosted deployments, you'll need to [configure an E2B API key](#self-hosted-setup). - - - -Each execution runs in a **fresh environment** - variables, files, and state do not persist between runs. - - -## Quick Start - - -```python Python -from letta import Letta - -client = Letta(token="LETTA_API_KEY") - -agent = client.agents.create( - model="openai/gpt-4o", - tools=["run_code"], - memory_blocks=[{ - "label": "persona", - "value": "I can run Python code for data analysis and API calls." - }] -) -``` - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -const agent = await client.agents.create({ - model: "openai/gpt-4o", - tools: ["run_code"], - memoryBlocks: [{ - label: "persona", - value: "I can run Python code for data analysis and API calls." - }] -}); -``` - - -## Tool Parameters - -| Parameter | Type | Options | Description | -|-----------|------|---------|-------------| -| `code` | `str` | Required | The code to execute | -| `language` | `str` | `python`, `js`, `ts`, `r`, `java` | Programming language | - -## Return Format - -```json -{ - "results": ["Last expression value"], - "logs": { - "stdout": ["Print statements"], - "stderr": ["Error output"] - }, - "error": "Error details if execution failed" -} -``` - -**Output types:** -- `results[]`: Last expression value (Jupyter-style) -- `logs.stdout`: Print statements and standard output -- `logs.stderr`: Error messages -- `error`: Present if execution failed - -## Supported Languages - -| Language | Key Limitations | -|----------|-----------------| -| **Python** | None - full ecosystem available | -| **JavaScript** | No npm packages - built-in Node modules only | -| **TypeScript** | No npm packages - built-in Node modules only | -| **R** | No tidyverse - base R only | -| **Java** | JShell-style execution - no traditional class definitions | - -### Python - -Full Python ecosystem with common packages pre-installed: - -- **Data**: numpy, pandas, scipy, scikit-learn -- **Web**: requests, aiohttp, beautifulsoup4 -- **Utilities**: matplotlib, PyYAML, Pillow - -Check available packages: -```python -import pkg_resources -print([d.project_name for d in pkg_resources.working_set]) -``` - -### JavaScript & TypeScript - -No npm packages available - only built-in Node modules. - -```javascript -// Works -const fs = require('fs'); -const http = require('http'); - -// Fails -const axios = require('axios'); -``` - -### R - -Base R only - no tidyverse packages. - -```r -# Works -mean(c(1, 2, 3)) - -# Fails -library(ggplot2) -``` - -### Java - -JShell-style execution - statement-level only. - -```java -// Works -System.out.println("Hello"); -int x = 42; - -// Fails -public class Main { - public static void main(String[] args) { } -} -``` - -## Network Access - -The sandbox has full network access for HTTP requests, API calls, and DNS resolution. - -```python -import requests - -response = requests.get('https://api.github.com/repos/letta-ai/letta') -data = response.json() -print(f"Stars: {data['stargazers_count']}") -``` - -## No State Persistence - -Variables, files, and state do not carry over between executions. Each `run_code` call is completely isolated. - -```python -# First execution -x = 42 - -# Second execution (separate run_code call) -print(x) # Error: NameError: name 'x' is not defined -``` - -**Implications:** -- Must re-import libraries each time -- Files written to disk are lost -- Cannot build up state across executions - -## Self-Hosted Setup - -For self-hosted servers, configure an E2B API key. [E2B](https://e2b.dev) provides the sandbox infrastructure. - - -```bash Docker -docker run \ - -e E2B_API_KEY="your_e2b_api_key" \ - letta/letta:latest -``` - -```yaml Docker Compose -services: - letta: - environment: - - E2B_API_KEY=your_e2b_api_key -``` - -```bash Server -export E2B_API_KEY="your_e2b_api_key" -letta server -``` - -```python Per-Agent -agent = client.agents.create( - tools=["run_code"], - tool_env_vars={ - "E2B_API_KEY": "your_e2b_api_key" - } -) -``` - - -## Common Patterns - -### Data Analysis -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["run_code"], - memory_blocks=[{ - "label": "persona", - "value": "I use Python with pandas and numpy for data analysis." - }] -) -``` - -### API Integration -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["run_code", "web_search"], - memory_blocks=[{ - "label": "persona", - "value": "I fetch data from APIs using run_code and search docs with web_search." - }] -) -``` - -### Statistical Analysis -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["run_code"], - memory_blocks=[{ - "label": "persona", - "value": "I perform statistical analysis using scipy and numpy." - }] -) -``` - -## When to Use - -| Use Case | Tool | Why | -|----------|------|-----| -| Data analysis | `run_code` | Full Python data stack | -| Math calculations | `run_code` | Programmatic computation | -| Live API data | `run_code` | Network + processing | -| Web scraping | `run_code` | requests + BeautifulSoup | -| Simple search | `web_search` | Purpose-built | -| Persistent data | Archival memory | State persistence | - -## Related Documentation - -- [Utilities Overview](/guides/agents/prebuilt-tools) -- [Web Search](/guides/agents/web-search) -- [Fetch Webpage](/guides/agents/fetch-webpage) -- [Custom Tools](/guides/agents/custom-tools) -- [Tool Variables](/guides/agents/tool-variables) diff --git a/fern/pages/agents/shared-memory-blocks-guide.mdx b/fern/pages/agents/shared-memory-blocks-guide.mdx deleted file mode 100644 index c30692af..00000000 --- a/fern/pages/agents/shared-memory-blocks-guide.mdx +++ /dev/null @@ -1,837 +0,0 @@ ---- -title: Shared Memory Blocks Guide -subtitle: Complete guide to using shared memory for multi-agent coordination -slug: guides/agents/shared-memory-blocks ---- - -Shared memory blocks enable multiple agents to access and update the same memory, creating powerful multi-agent systems with seamless coordination. - -## Overview - -**Shared memory blocks** allow you to attach the same memory block to multiple agents. When one agent updates the block, all other agents with access immediately see the changes. This enables: - -- **Real-time coordination** without explicit agent-to-agent messaging -- **Consistent information** across teams and departments -- **Hierarchical access control** based on roles and responsibilities -- **Privacy boundaries** for sensitive information -- **Knowledge sharing** across specialized agents - - -Shared memory blocks are different from agent-to-agent messaging (like `send_message_to_agent_async`). With shared memory, agents coordinate **asynchronously** through shared state rather than direct communication. - - -## Core Concepts - -### What is a Shared Memory Block? - -A memory block becomes "shared" when you attach it to multiple agents using the same `block_id`. All agents with access see the same content in real-time. - -```python -from letta import Letta - -client = Letta() - -# Step 1: Create a memory block -shared_block = client.blocks.create( - label="team_knowledge", - description="Shared knowledge base for the team", - value="Team policies and procedures...", - limit=5000 -) - -# Step 2: Attach to multiple agents -agent1 = client.agents.create( - name="Agent_1", - block_ids=[shared_block.id], # Attach shared block - # ... other config -) - -agent2 = client.agents.create( - name="Agent_2", - block_ids=[shared_block.id], # Same block ID = shared memory - # ... other config -) - -# Now agent1 and agent2 share the same memory block! -``` - -### Block Types - -| Type | Description | Use Case | -|---|---|---| -| **Read-Only** | Agents can read but not modify | Company policies, reference data | -| **Read/Write** | Agents can read and update | Task queues, shared notes | -| **Private** | Single agent only | Personal work logs, private notes | - -### Access Patterns - -Letta supports multiple access patterns for organizing shared memory: - -1. **Hierarchical**: Tier 1 < Tier 2 < Tier 3 (access increases up the hierarchy) -2. **Team-Based**: All team members share the same blocks -3. **Overlapping**: Each agent has a unique combination of blocks -4. **Organizational**: Department → Cross-Department → Executive levels - -## Architecture Patterns - -### Pattern 1: Hierarchical Access (Support Tiers) - -``` -Tier 1 Agents → company_policies [R] -Tier 2 Agents → company_policies [R], escalation_procedures [R] -Tier 3 Agents → company_policies [R], escalation_procedures [R], team_metrics [R/W] -``` - -**Use Cases:** -- Customer support with tier levels -- Knowledge bases with sensitivity levels -- Organizations with clearance levels - -**Example:** [Read-Only Organizational Knowledge Tutorial](/cookbooks/shared-memory-read-only) - -### Pattern 2: Team Coordination (Shared Queues) - -``` -All Team Members → task_queue [R/W], completed_work [R/W] -Supervisor Only → team_metrics [R/W] -Each Worker → private_work_log [R/W] -``` - -**Use Cases:** -- Task coordination across workers -- Project management teams -- Shared deliverables tracking - -**Example:** [Task Coordination Tutorial](/cookbooks/shared-memory-task-coordination) - -### Pattern 3: Specialized Agents (Overlapping Access) - -``` -Coordinator → ALL blocks (user_profile, preferences, interaction_history, calendar, financial) -Email Agent → user_profile, preferences, interaction_history, calendar -Research Agent → user_profile, preferences, interaction_history -Calendar Agent → user_profile, preferences, calendar -Finance Agent → user_profile, preferences, financial -``` - -**Use Cases:** -- Personal AI assistant networks -- Specialized service agents -- Multi-domain customer support - -**Example:** [Multi-Agent User Assistant Tutorial](/cookbooks/shared-memory-user-assistant) - -### Pattern 4: Enterprise Hierarchy (Departments) - -``` -Company-Wide [R] → ALL agents (mission, policies) -Department Blocks [R/W] → Department members only -Cross-Dept Block [R/W] → All directors + CEO -Executive Dashboard [R/W] → CEO only -``` - -**Use Cases:** -- Enterprise organizations -- Multi-department companies -- Regulated industries with compliance requirements - -**Example:** [Enterprise Multi-Team Tutorial](/cookbooks/shared-memory-enterprise) - -## Best Practices - -### 1. Use Read-Only Blocks for Critical Information - -Protect policies, procedures, and reference data from accidental modification: - -```python -company_policies = client.blocks.create( - label="company_policies", - value="Our company policies...", - # Read-only ensures consistency -) -``` - - -Even though Letta doesn't currently enforce read-only at the API level, agents will respect read-only semantics and refuse to modify these blocks when instructed not to in their persona. - - -### 2. Implement the Principle of Least Privilege - -Only give agents access to blocks they need: - -```python -# ❌ Bad: Sales agent with access to HR data -sales_agent = client.agents.create( - block_ids=[sales_knowledge.id, hr_employee_data.id] # Too much access -) - -# ✓ Good: Sales agent with only sales data -sales_agent = client.agents.create( - block_ids=[sales_knowledge.id] # Appropriate access -) -``` - -### 3. Use Clear Naming Conventions - -Make block purposes obvious: - -```python -# ✓ Good names -"company_policies" # Clear scope and content -"sales_team_knowledge" # Clear ownership -"cross_dept_projects" # Clear purpose -"ceo_executive_dashboard" # Clear access level - -# ❌ Bad names -"data" # Too generic -"block1" # Not descriptive -"temp" # Purpose unclear -``` - -### 4. Set Appropriate Character Limits - -Balance between enough space and memory constraints: - -```python -# Reference data: smaller -company_policies = client.blocks.create( - limit=5000 # Policies don't change often -) - -# Active coordination: larger -task_queue = client.blocks.create( - limit=10000 # Tasks accumulate over time -) - -# Detailed logs: largest -interaction_history = client.blocks.create( - limit=12000 # Many interactions to track -) -``` - - -If a block frequently hits its character limit, consider archiving old content or splitting into multiple blocks (e.g., `current_tasks` vs `completed_tasks`). - - -### 5. Document Block Access in Agent Personas - -Make agents aware of their access: - -```python -agent = client.agents.create( - memory_blocks=[{ - "label": "persona", - "value": """I am a Sales Representative. - -My access: -- company_policies (read-only): Company-wide policies -- sales_knowledge (read/write): Shared with sales team -- my_leads (private): My personal lead tracking - -I do NOT have access to: -- engineering_specs (Engineering team only) -- hr_employee_data (HR team only) -""" - }] -) -``` - -### 6. Use Descriptive Block Descriptions - -Help with debugging and management: - -```python -block = client.blocks.create( - label="sales_knowledge", - description="Sales team knowledge base: pricing, playbooks, targets. Read/write access for Sales Director, Rep 1, Rep 2.", - # Good description includes: content, access, and purpose -) -``` - -## Common Use Cases - -### Use Case 1: Customer Support with Tiers - -**Problem:** Support agents at different levels need different information. - -**Solution:** Hierarchical blocks with increasing access - -``` -support_tier1/ → Basic policies -support_tier2/ → Advanced troubleshooting + escalation procedures -support_tier3/ → Full system access + team metrics -``` - -**Benefits:** -- Consistent policy information across all tiers -- Sensitive escalation procedures protected -- Supervisors track team performance privately - -### Use Case 2: Project Management Team - -**Problem:** Multiple workers need to coordinate on tasks. - -**Solution:** Shared task queue + completion log - -``` -task_queue (R/W) → All team members claim and update tasks -completed_work (R/W) → All team members share findings -team_metrics (R/W) → Supervisor only tracks performance -``` - -**Benefits:** -- Real-time task claiming without conflicts -- Knowledge sharing through completed work -- Supervisor oversight without micromanagement - -### Use Case 3: Personal AI Assistant Network - -**Problem:** User needs specialized agents that understand full context. - -**Solution:** Overlapping block access with privacy boundaries - -``` -Universal: user_profile, user_preferences → All agents -Coordination: interaction_history → Coordinator, Email, Research -Domain-specific: calendar_data → Calendar, Email agents only -Restricted: financial_data → Finance agent only -``` - -**Benefits:** -- Seamless handoffs between specialists -- Consistent user experience -- Privacy protection for sensitive data - -### Use Case 4: Enterprise Organization - -**Problem:** Large company with departments needs coordination. - -**Solution:** Multi-tier hierarchy with isolation - -``` -Company-wide (R) → All employees see mission/policies -Department (R/W) → Each dept has private knowledge -Cross-dept (R/W) → Directors coordinate projects -Executive (R/W) → CEO tracks company metrics -``` - -**Benefits:** -- Department autonomy and isolation -- Async cross-department coordination -- Executive oversight without bottlenecks -- Compliance with data privacy regulations - -## API Reference - -### Creating Shared Blocks - -```python -block = client.blocks.create( - label="block_name", # Required: identifier - description="What this is", # Recommended: for management - value="Initial content", # Required: starting content - limit=5000 # Optional: character limit -) -``` - -### Attaching to Agents (at Creation) - -```python -agent = client.agents.create( - name="Agent_Name", - block_ids=[block1.id, block2.id], # Attach existing blocks - memory_blocks=[ # Create new private blocks - {"label": "persona", "value": "..."} - ], - # ... other config -) -``` - -### Attaching to Existing Agents - -```python -# Attach a block to an existing agent -client.agents.blocks.attach( - agent_id=agent.id, - block_id=block.id -) - -# Detach a block from an agent -client.agents.blocks.detach( - agent_id=agent.id, - block_id=block.id -) -``` - -### Listing Blocks - -Find blocks across your project with optional filtering: - - -```python Python -# List all blocks in project -all_blocks = client.blocks.list() - -# Filter by label -team_blocks = client.blocks.list(label="team_knowledge") - -# Search by label text -search_results = client.blocks.list(label_search="sales") -``` -```typescript TypeScript -// List all blocks in project -const allBlocks = await client.blocks.list(); - -// Filter and search -const teamBlocks = await client.blocks.list({ - label: "team_knowledge", - labelSearch: "sales" -}); -``` - - -### Retrieving a Block - -Get complete block information by ID: - - -```python Python -block = client.blocks.retrieve(block.id) -print(f"Block: {block.label}") -print(f"Value: {block.value}") -``` -```typescript TypeScript -const block = await client.blocks.retrieve(block.id); -console.log(`Block: ${block.label}`); -console.log(`Value: ${block.value}`); -``` - - -### Modifying Blocks Directly - -Update block content without going through an agent. Useful for external scripts syncing data to agents: - - -```python Python -# Update block content - completely replaces the value -client.blocks.modify( - block.id, - value="Updated team knowledge: New procedures..." -) - -# Update multiple properties -client.blocks.modify( - block.id, - value="New content", - limit=8000, - description="Updated description" -) - -# Make block read-only -client.blocks.modify(block.id, read_only=True) -``` -```typescript TypeScript -// Update block content - completely replaces the value -await client.blocks.modify(block.id, { - value: "Updated team knowledge: New procedures..." -}); - -// Update multiple properties -await client.blocks.modify(block.id, { - value: "New content", - limit: 8000, - description: "Updated description" -}); - -// Make block read-only -await client.blocks.modify(block.id, { readOnly: true }); -``` - - - -**Setting `value` completely replaces the entire block content** - it is not an append operation. When you modify a shared block directly, all agents with access will see the changes immediately. - -**Race condition risk**: If two processes (agents or external scripts) modify the same block concurrently, the last write wins and completely overwrites all earlier changes. To avoid data loss: -- Set blocks to **read-only** if you don't want agents or other processes to modify them -- Only allow direct modifications in controlled scenarios where overwriting is acceptable -- Ensure your application logic accounts for the fact that block updates are full replacements, not merges - - -### Deleting Blocks - -Remove blocks when no longer needed. This detaches the block from all agents: - - -```python Python -client.blocks.delete(block_id=block.id) -``` -```typescript TypeScript -await client.blocks.delete(block.id); -``` - - -### Agent-Scoped Operations - -#### List an Agent's Blocks - -See all memory blocks attached to a specific agent: - - -```python Python -# List all blocks for an agent -agent_blocks = client.agents.blocks.list(agent_id=agent.id) - -# With pagination -agent_blocks = client.agents.blocks.list( - agent_id=agent.id, - limit=10, - order="asc" -) - -for block in agent_blocks: - print(f"{block.label}: {block.value[:50]}...") -``` -```typescript TypeScript -// List all blocks for an agent -const agentBlocks = await client.agents.blocks.list(agent.id); - -// With pagination -const agentBlocksPaginated = await client.agents.blocks.list(agent.id, { - limit: 10, - order: "asc" -}); - -for (const block of agentBlocks) { - console.log(`${block.label}: ${block.value.slice(0, 50)}...`); -} -``` - - -#### Retrieve Agent's Block by Label - -Get a specific block from an agent using its label instead of ID: - - -```python Python -# Get agent's human block -human_block = client.agents.blocks.retrieve( - agent_id=agent.id, - block_label="human" -) -print(human_block.value) - -# Get shared task queue from specific agent -task_queue = client.agents.blocks.retrieve( - agent_id=worker_agent.id, - block_label="task_queue" -) -``` -```typescript TypeScript -// Get agent's human block -const humanBlock = await client.agents.blocks.retrieve( - agent.id, - "human" -); -console.log(humanBlock.value); - -// Get shared task queue from specific agent -const taskQueue = await client.agents.blocks.retrieve( - workerAgent.id, - "task_queue" -); -``` - - -#### Modify Agent's Block by Label - -Update a specific agent's block without needing the block ID: - - -```python Python -# Update agent's knowledge about the human -client.agents.blocks.modify( - agent_id=agent.id, - block_label="human", - value="Updated user information: Alice, prefers email over chat" -) - -# Update shared block via specific agent -client.agents.blocks.modify( - agent_id=worker.id, - block_label="task_queue", - value="Updated task queue with new items..." -) -``` -```typescript TypeScript -// Update agent's knowledge about the human -await client.agents.blocks.modify(agent.id, "human", { - value: "Updated user information: Alice, prefers email over chat" -}); - -// Update shared block via specific agent -await client.agents.blocks.modify(worker.id, "task_queue", { - value: "Updated task queue with new items..." -}); -``` - - -### Inspecting Block Usage - -See which agents have a block attached: - - -```python Python -# List all agents that use this block -agents_with_block = client.blocks.agents.list(block_id=block.id) -print(f"Used by {len(agents_with_block)} agents:") -for agent in agents_with_block: - print(f" - {agent.name}") - -# With pagination -agents_page = client.blocks.agents.list( - block_id=block.id, - limit=10, - order="asc" -) -``` -```typescript TypeScript -// List all agents that use this block -const agentsWithBlock = await client.blocks.agents.list(block.id); -console.log(`Used by ${agentsWithBlock.length} agents:`); -for (const agent of agentsWithBlock) { - console.log(` - ${agent.name}`); -} - -// With pagination -const agentsPage = await client.blocks.agents.list(block.id, { - limit: 10, - order: "asc" -}); -``` - - -### Updating Blocks via Agents - -Agents update blocks using memory tools during conversations: - - -```python Python -# Agent updates shared block content -client.agents.messages.create( - agent_id=agent.id, - messages=[{ - "role": "user", - "content": "Update the task queue to mark task-001 as complete" - }] -) - -# Agent uses core_memory_replace or core_memory_append tools -# Changes are immediately visible to all agents with access -``` -```typescript TypeScript -// Agent updates shared block content -await client.agents.messages.create(agent.id, { - messages: [{ - role: "user", - content: "Update the task queue to mark task-001 as complete" - }] -}); - -// Agent uses core_memory_replace or core_memory_append tools -// Changes are immediately visible to all agents with access -``` - - -## Troubleshooting - -### Problem: Agent Can't See Block Updates - -**Symptoms:** Agent reads old content after another agent updated it. - -**Solutions:** -1. Verify both agents have the same `block_id` attached -2. Check that updates are being committed (agent finished its turn) -3. Ensure character limit hasn't been exceeded (updates may fail silently) - -```python -# Debug: Check which agents share the block -block_info = client.blocks.retrieve(block_id=block.id) -print(f"Agents with access: {block_info.agent_ids}") -``` - -### Problem: Block Character Limit Exceeded - -**Symptoms:** Updates not applying, content truncated. - -**Solutions:** -1. Increase block limit: `client.blocks.update(block_id=block.id, limit=10000)` -2. Archive old content: Move completed items to a separate block -3. Summarize content: Have an agent periodically summarize and condense - -```python -# Check current usage -block = client.blocks.retrieve(block_id=block.id) -print(f"Characters: {len(block.value)} / {block.limit}") -``` - -### Problem: Privacy Violation - -**Symptoms:** Agent accessing data it shouldn't see. - -**Solutions:** -1. Review block attachments: `client.agents.retrieve(agent_id).block_ids` -2. Detach inappropriate blocks: `client.agents.blocks.detach()` -3. Update agent persona to clarify access boundaries -4. Consider splitting one block into multiple with different access - -### Problem: Race Conditions on Concurrent Updates - -**Symptoms:** Two agents try to claim the same task, conflicts occur. - -**Solutions:** -1. Design blocks to minimize conflicts (separate sections for each agent) -2. Use timestamps and agent IDs in updates -3. Implement retry logic for failed updates -4. Consider optimistic concurrency control - -```python -# Good: Each agent updates their own section -""" -TASK-001: - Status: In Progress - Claimed by: Worker_1 - Timestamp: 2024-10-08 14:30 - -TASK-002: - Status: In Progress - Claimed by: Worker_2 - Timestamp: 2024-10-08 14:31 -""" -``` - -## Performance Considerations - -### Block Size and Agent Performance - -- **Smaller blocks (<5K chars)**: Faster loading, more focused context -- **Larger blocks (>10K chars)**: More context but slower processing -- **Optimal**: Keep blocks focused on single purpose, split large blocks - -### Number of Blocks per Agent - -- **Recommended**: 3-7 blocks per agent -- **Each block adds**: Context in agent's working memory -- **Too many blocks**: May dilute agent focus -- **Too few blocks**: May limit coordination capabilities - -### Update Frequency - -- **High-frequency updates** (many agents, frequent changes): Consider separate blocks to reduce contention -- **Low-frequency updates** (policies, references): Larger consolidated blocks are fine - -## Security and Compliance - -### Data Privacy - -Shared memory blocks enable compliance with data privacy regulations: - -```python -# GDPR/HIPAA Example: Isolate sensitive data -hr_employee_data = client.blocks.create( - label="hr_employee_data", - description="CONFIDENTIAL - HR Department only. Contains PII." -) - -# Only attach to authorized agents -hr_director = client.agents.create( - block_ids=[hr_employee_data.id] # Only HR has access -) - -# Sales/Engineering agents do NOT get access -sales_agent = client.agents.create( - block_ids=[sales_knowledge.id] # No HR data access -) -``` - -### Audit Trail - -Track block access for compliance: - -```python -# Check block usage -block_info = client.blocks.retrieve(block_id=sensitive_block.id) -print(f"Accessed by: {block_info.agent_ids}") - -# Log all agents with access -for agent_id in block_info.agent_ids: - agent = client.agents.retrieve(agent_id=agent_id) - print(f" {agent.name} - {agent.created_at}") -``` - -### Access Revocation - -Remove access when no longer needed: - -```python -# Employee leaves company - revoke agent access -client.agents.blocks.detach( - agent_id=former_employee_agent.id, - block_id=company_confidential.id -) - -# Or delete the agent entirely -client.agents.delete(agent_id=former_employee_agent.id) -``` - -## Tutorials - -Learn shared memory patterns through hands-on tutorials: - - - -Build a hierarchical support team with shared company policies - - - -Create a data analysis team with shared task queues - - - -Build specialized agents with overlapping block access - - - -Implement a complete enterprise with departments and hierarchies - - - -## Related Guides - - - -Understanding memory blocks and core memory - - - -Overview of multi-agent architectures in Letta - - - -Alternative coordination pattern using async messaging - - - -Complete API documentation for blocks and memory - - - -## Key Takeaways - -✓ **Shared memory blocks** enable seamless multi-agent coordination without explicit messaging -✓ **Access patterns** range from simple (all agents) to complex (hierarchical organizations) -✓ **Privacy boundaries** protect sensitive data while enabling collaboration -✓ **Real-time sync** ensures all agents see updates immediately -✓ **Scales** from 2 agents to enterprise systems with 10+ agents -✓ **Complements** agent-to-agent messaging for complete multi-agent systems - -Shared memory blocks are a powerful primitive for building sophisticated multi-agent systems. Start with simple patterns (Tutorial 1) and progress to complex architectures (Tutorial 4) as your needs grow. diff --git a/fern/pages/agents/sleep_time_agents.mdx b/fern/pages/agents/sleep_time_agents.mdx deleted file mode 100644 index 5917bc41..00000000 --- a/fern/pages/agents/sleep_time_agents.mdx +++ /dev/null @@ -1,125 +0,0 @@ ---- -title: Sleep-time Agents -subtitle: Based on the new sleep-time compute research paper -slug: guides/agents/architectures/sleeptime ---- - - -Sleep-time agents are experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta). - - - -To learn more about sleep-time compute, check out our [blog](https://www.letta.com/blog/sleep-time-compute) and [research paper](https://arxiv.org/abs/2504.13171). - - - - - - -In Letta, you can create special **sleep-time agents** that share the memory of your primary agents, but run in the background and can modify the memory asynchronously. You can think of sleep-time agents as a special form of multi-agent architecture, where all agents in the system share one or more memory blocks. A single agent can have one or more associated sleep-time agents to process data such as the conversation history or data sources to manage the memory blocks of the primary agent. - -To enable sleep-time agents for your agent, set `enableSleeptime: true` when creating your agent. This will automatically create: -* A primary agent with tools for `conversation_search` and `archival_memory_search`. This is your "main" agent that you configure and interact with. -* A sleep-time agent with tools to manage the memory blocks of the primary agent. - -## Background: Memory Blocks -Sleep-time agents specialize in generating *learned context*. Given some original context (e.g. the conversation history, a set of files) the sleep-time agent will reflect on the original context to iteratively derive a learned context. The learned context will reflect the most important pieces of information or insights from the original context. - -In Letta, the learned context is saved in a memory block. A memory block represents a labeled section of the context window with an associated character limit. Memory blocks can be shared between multiple agents. A sleep-time agent will write the learned context to a memory block, which can also be shared with other agents that could benefit from those learnings. - -Memory blocks can be access directly through the API to be updated, retrieved, or deleted. - - -```typescript TypeScript -// get a block by label -const block = await client.agents.blocks.retrieve(agentId, "persona"); - -// get a block by ID -const block = await client.blocks.retrieve(blockId); -``` -```python title="python" -# get a block by label -block = client.agents.blocks.retrieve(agent_id=agent_id, block_label="persona") - -# get a block by ID -block = client.blocks.retrieve(block_id=block_id) -``` - - -When sleep-time is enabled for an agent, a sleep-time agent is created to manage the memory blocks of the primary agent. The sleep-time agent runs in the background and can modify the memory blocks asynchronously. The sleep-time agent generates learned context from the conversation history to update the memory blocks of the primary agent. - -## Sleep-time agent for conversation - - - - -When sleep-time is enabled, a primary agent and a sleep-time agent are created as part of a multi-agent group under the hood. The sleep-time agent is responsible for generating learned context from the conversation history to update the memory blocks of the primary agent. The group ensures that for every `N` steps taken by the primary agent, the sleep-time agent is invoked with data containing new messages in the primary agent's message history. - - - -### Configuring the frequency of sleep-time updates -The sleep-time agent will be triggered every N-steps (default `5`) to update the memory blocks of the primary agent. You can configure the frequency of updates by setting the `sleeptime_agent_frequency` parameter when creating the agent. - - -```typescript TypeScript maxLines=50 -import { LettaClient, SleeptimeManagerUpdate } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// create a sleep-time-enabled agent -const agent = await client.agents.create({ - memoryBlocks: [ - { value: "", label: "human" }, - { value: "You are a helpful assistant.", label: "persona" } - ], - model: "anthropic/claude-3-7-sonnet-20250219", - embedding: "openai/text-embedding-3-small", - enableSleeptime: true -}); -console.log(`Created agent id ${agent.id}`); - -// get the multi-agent group -const groupId = agent.multiAgentGroup.id; -const currentFrequency = agent.multiAgentGroup.sleeptimeAgentFrequency; -console.log(`Group id: ${groupId}, frequency: ${currentFrequency}`); - -// update the frequency to every 2 steps -const group = await client.groups.modify(groupId, { - managerConfig: { - sleeptimeAgentFrequency: 2 - } as SleeptimeManagerUpdate -}); -``` -```python title="python" maxLines=50 -from letta_client import Letta -from letta_client.types import SleeptimeManagerUpdate - -client = Letta(token="LETTA_API_KEY") - -# create a sleep-time-enabled agent -agent = client.agents.create( - memory_blocks=[ - {"value": "", "label": "human"}, - {"value": "You are a helpful assistant.", "label": "persona"}, - ], - model="anthropic/claude-3-7-sonnet-20250219", - embedding="openai/text-embedding-3-small", - enable_sleeptime=True, -) -print(f"Created agent id {agent.id}") - -# get the multi-agent group -group_id = agent.multi_agent_group.id -current_frequence = agent.multi_agent_group.sleeptime_agent_frequency -print(f"Group id: {group_id}, frequency: {current_frequence}") - -# update the frequency to every 2 steps -group = client.groups.modify( - group_id=group_id, - manager_config=SleeptimeManagerUpdate( - sleeptime_agent_frequency=2 - ), -) -``` - -We recommend keeping the frequency relatively high (e.g. 5 or 10) as triggering the sleep-time agent too often can be expensive (due to high token usage) and has diminishing returns. diff --git a/fern/pages/agents/tool_rules.mdx b/fern/pages/agents/tool_rules.mdx deleted file mode 100644 index c8f4a8d4..00000000 --- a/fern/pages/agents/tool_rules.mdx +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: Creating Tool Rules -slug: guides/agents/tool-rules ---- - -Tool rules allows developer to define constrains on their tools, such as requiring that a tool terminate agent execution or be followed by another tool. - - -```mermaid -flowchart LR - subgraph init["InitToolRule"] - direction LR - start((Start)) --> init_tool["must_run_first"] - init_tool --> other1["...other tools..."] - end - - subgraph terminal["TerminalToolRule"] - direction LR - other2["...other tools..."] --> term_tool["terminal_tool"] --> stop1((Stop)) - end - - subgraph sequence["ChildToolRule (children)"] - direction LR - parent_tool["parent_tool"] --> child1["child_tool_1"] - parent_tool --> child2["child_tool_2"] - parent_tool --> child3["child_tool_3"] - end - - classDef stop fill:#ffcdd2,stroke:#333 - classDef start fill:#c8e6c9,stroke:#333 - class stop1 stop - class start start -``` - - -Letta currently supports the following tool rules (with more being added): - -* `TerminalToolRule(tool_name=...)` - * If the tool is called, the agent ends execution -* `InitToolRule(tool_name=...)` - * The tool must be called first when an agent is run -* `ChildToolRule(tool_name=..., children=[...])` - * If the tool is called, it must be followed by one of the tools specified in `children` -* `ParentToolRule(tool_name=..., children=[...])` - * The tool must be called before the tools specified in `children` can be called -* `ConditionalToolRule(tool_name=..., child_output_mapping={...})` - * If the tool is called, it must be followed by one of the tools specified in `children` based off the tool's output -* `ContinueToolRule(tool_name=...)` - * If the tool is called, the agent must continue execution -* `MaxCountPerStepToolRule(tool_name=..., max_count_limit=...)` - * The tool cannot be called more than `max_count_limit` times in a single step - -## Default tool rules - -Depending on your agent configuration, there may be default tool rules applied to improve performance. - -## Tool rule examples - -For example, you can ensure that the agent will stop execution after the `roll_d20` tool is called by specifying tool rules in the agent creation: - -```typescript TypeScript {6-11} -// create a new agent -const agentState = await client.createAgent({ - // create the agent with an additional tool - tools: [tool.name], - // add tool rules that terminate execution after specific tools - toolRules: [ - // exit after roll_d20 is called - {toolName: tool.name, type: "exit_loop"}, - ], -}); - -console.log(`Created agent with name ${agentState.name} with tools ${agentState.tools}`); -``` -```python Python {6-11} -# create a new agent -agent_state = client.create_agent( - # create the agent with an additional tool - tools=[tool.name], - # add tool rules that terminate execution after specific tools - tool_rules=[ - # exit after roll_d20 is called - TerminalToolRule(tool_name=tool.name, type="exit_loop"), - ], -) - -print(f"Created agent with name {agent_state.name} with tools {agent_state.tools}") -``` - - -You can see a full working example of tool rules [here](https://github.com/letta-ai/letta/blob/0.5.2/examples/tool_rule_usage.py). diff --git a/fern/pages/agents/web_search.mdx b/fern/pages/agents/web_search.mdx deleted file mode 100644 index 30dc9f00..00000000 --- a/fern/pages/agents/web_search.mdx +++ /dev/null @@ -1,342 +0,0 @@ ---- -title: Web Search -subtitle: Search the internet in real-time with AI-powered search -slug: guides/agents/web-search ---- - -The `web_search` tool enables Letta agents to search the internet for current information, research, and general knowledge using [Exa](https://exa.ai)'s AI-powered search engine. - - -On [Letta Cloud](/guides/cloud/overview), this tool works out of the box. For self-hosted deployments, you'll need to [configure an Exa API key](#self-hosted-setup). - - -## Quick Start - -### Adding Web Search to an Agent - - -```python Python -from letta import Letta - -client = Letta(token="LETTA_API_KEY") - -agent = client.agents.create( - model="openai/gpt-4o", - embedding="openai/text-embedding-3-small", - tools=["web_search"], - memory_blocks=[ - { - "label": "persona", - "value": "I'm a research assistant who uses web search to find current information and cite sources." - } - ] -) -``` - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -const agent = await client.agents.create({ - model: "openai/gpt-4o", - embedding: "openai/text-embedding-3-small", - tools: ["web_search"], - memoryBlocks: [ - { - label: "persona", - value: "I'm a research assistant who uses web search to find current information and cite sources." - } - ] -}); -``` - - -### Usage Example - -```python -response = client.agents.messages.create( - agent_id=agent.id, - messages=[ - { - "role": "user", - "content": "What are the latest developments in agent-based AI systems?" - } - ] -) -``` - -Your agent can now choose to use `web_search` when it needs current information. - -## Self-Hosted Setup - -For self-hosted Letta servers, you'll need an Exa API key. - -### Get an API Key - -1. Sign up at [dashboard.exa.ai](https://dashboard.exa.ai/) -2. Copy your API key -3. See [Exa pricing](https://docs.exa.ai) for rate limits and costs - -### Configuration Options - - -```bash Docker -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - -p 8283:8283 \ - -e OPENAI_API_KEY="your_openai_key" \ - -e EXA_API_KEY="your_exa_api_key" \ - letta/letta:latest -``` - -```yaml Docker Compose -version: '3.8' -services: - letta: - image: letta/letta:latest - ports: - - "8283:8283" - environment: - - OPENAI_API_KEY=your_openai_key - - EXA_API_KEY=your_exa_api_key - volumes: - - ~/.letta/.persist/pgdata:/var/lib/postgresql/data -``` - -```bash Python Server -export EXA_API_KEY="your_exa_api_key" -letta server -``` - -```python Per-Agent Configuration -agent = client.agents.create( - model="openai/gpt-4o", - embedding="openai/text-embedding-3-small", - tools=["web_search"], - tool_env_vars={ - "EXA_API_KEY": "your_exa_api_key" - } -) -``` - - -## Tool Parameters - -The `web_search` tool supports advanced filtering and search customization: - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `query` | `str` | Required | The search query to find relevant web content | -| `num_results` | `int` | 10 | Number of results to return (1-100) | -| `category` | `str` | None | Focus search on specific content types (see below) | -| `include_text` | `bool` | False | Whether to retrieve full page content (usually overflows context) | -| `include_domains` | `List[str]` | None | List of domains to include in search results | -| `exclude_domains` | `List[str]` | None | List of domains to exclude from search results | -| `start_published_date` | `str` | None | Only return content published after this date (ISO format) | -| `end_published_date` | `str` | None | Only return content published before this date (ISO format) | -| `user_location` | `str` | None | Two-letter country code for localized results (e.g., "US") | - -### Available Categories - -Use the `category` parameter to focus your search on specific content types: - -| Category | Best For | Example Query | -|----------|----------|---------------| -| `company` | Corporate information, company websites | "Tesla energy storage solutions" | -| `research paper` | Academic papers, arXiv, research publications | "transformer architecture improvements 2025" | -| `news` | News articles, current events | "latest AI policy developments" | -| `pdf` | PDF documents, reports, whitepapers | "climate change impact assessment" | -| `github` | GitHub repositories, open source projects | "python async web scraping libraries" | -| `tweet` | Twitter/X posts, social media discussions | "reactions to new GPT release" | -| `personal site` | Blogs, personal websites, portfolios | "machine learning tutorial blogs" | -| `linkedin profile` | LinkedIn profiles, professional bios | "AI research engineers at Google" | -| `financial report` | Earnings reports, financial statements | "Apple Q4 2024 earnings" | - -### Return Format - -The tool returns a JSON-encoded string containing: - -```json -{ - "query": "search query", - "results": [ - { - "title": "Page title", - "url": "https://example.com", - "published_date": "2025-01-15", - "author": "Author name", - "highlights": ["Key excerpt 1", "Key excerpt 2"], - "summary": "AI-generated summary of the content", - "text": "Full page content (only if include_text=true)" - } - ] -} -``` - -## Best Practices - -### 1. Guide When to Search - -Provide clear instructions to your agent about when web search is appropriate: - -```python -memory_blocks=[ - { - "label": "persona", - "value": "I'm a helpful assistant. I use web_search for current events, recent news, and topics requiring up-to-date information. I cite my sources." - } -] -``` - -### 2. Combine with Archival Memory - -Use web search for external/current information, and archival memory for your organization's internal data: - -```python -# Create agent with both web_search and archival memory tools -agent = client.agents.create( - model="openai/gpt-4o", - embedding="openai/text-embedding-3-small", - tools=["web_search", "archival_memory_search", "archival_memory_insert"], - memory_blocks=[ - { - "label": "persona", - "value": "I use web_search for current events and external research. I use archival_memory_search for company-specific information and internal documents." - } - ] -) -``` - -See the [Archival Memory documentation](/guides/agents/archival-memory-overview) for more information. - -### 3. Craft Effective Search Queries - -Exa uses neural search that understands semantic meaning. Your agent will generally form good queries naturally, but you can improve results by guiding it to: - -- **Be descriptive and specific**: "Latest research on RLHF techniques for language models" is better than "RLHF research" -- **Focus on topics, not keywords**: "How companies are deploying AI agents in customer service" works better than "AI agents customer service deployment" -- **Use natural language**: The search engine understands conversational queries like "What are the environmental impacts of Bitcoin mining?" -- **Specify time ranges when relevant**: Guide your agent to use date filters for time-sensitive queries - -Example instruction in memory: - -```python -memory_blocks=[ - { - "label": "search_strategy", - "value": "When searching, I craft clear, descriptive queries that focus on topics rather than keywords. I use the category and date filters when appropriate to narrow results." - } -] -``` - -### 4. Manage Context Window - -By default, `include_text` is `False` to avoid context overflow. The tool returns highlights and AI-generated summaries instead, which are more concise: - -```python -memory_blocks=[ - { - "label": "search_guidelines", - "value": "I avoid setting include_text=true unless specifically needed, as full text usually overflows the context window. Highlights and summaries are usually sufficient." - } -] -``` - -## Common Patterns - -### Research Assistant - -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["web_search"], - memory_blocks=[ - { - "label": "persona", - "value": "I'm a research assistant. I search for relevant information, synthesize findings from multiple sources, and provide citations." - } - ] -) -``` - -### News Monitor - -```python -agent = client.agents.create( - model="openai/gpt-4o-mini", - tools=["web_search"], - memory_blocks=[ - { - "label": "persona", - "value": "I monitor news and provide briefings on AI industry developments." - }, - { - "label": "topics", - "value": "Focus: AI/ML, agent systems, LLM advancements" - } - ] -) -``` - -### Customer Support - -```python -agent = client.agents.create( - model="openai/gpt-4o", - tools=["web_search"], - memory_blocks=[ - { - "label": "persona", - "value": "I help customers by checking documentation, service status pages, and community discussions for solutions." - } - ] -) -``` - -## Troubleshooting - -### Agent Not Using Web Search - -Check: -1. Tool is attached: `"web_search"` in agent's tools list -2. Instructions are clear about when to search -3. Model has good tool-calling capabilities (GPT-4, Claude 3+) - -```python -# Verify tools -agent = client.agents.retrieve(agent_id=agent.id) -print([tool.name for tool in agent.tools]) -``` - -### Missing EXA_API_KEY - -If you see errors about missing API keys on self-hosted deployments: - -```bash -# Check if set -echo $EXA_API_KEY - -# Set for session -export EXA_API_KEY="your_exa_api_key" - -# Docker example -docker run -e EXA_API_KEY="your_exa_api_key" letta/letta:latest -``` - -## When to Use Web Search - -| Use Case | Tool | Why | -|----------|------|-----| -| Current events, news | `web_search` | Real-time information | -| External research | `web_search` | Broad internet access | -| Internal documents | Archival memory | Fast, static data | -| User preferences | Memory blocks | In-context, instant | -| General knowledge | Pre-trained model | No search needed | - -## Related Documentation - -- [Utilities Overview](/guides/agents/prebuilt-tools) -- [Custom Tools](/guides/agents/custom-tools) -- [Tool Variables](/guides/agents/tool-variables) -- [Archival Memory](/guides/agents/archival-memory-overview) diff --git a/fern/pages/cookbooks_simple.mdx b/fern/pages/cookbooks_simple.mdx deleted file mode 100644 index 9b2d9a6d..00000000 --- a/fern/pages/cookbooks_simple.mdx +++ /dev/null @@ -1,274 +0,0 @@ ---- -title: Examples & Tutorials -slug: cookbooks ---- - -Build powerful AI agents with persistent memory. Explore tutorials, ready-to-use templates, and community projects to get started. - - -**New to Letta?** - -- Start with our [Quickstart Guide](/quickstart) -- Take the free [DeepLearning.AI Course](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory/) -- Explore [Awesome Letta](https://github.com/letta-ai/awesome-letta) for more resources - - -## Getting Started Tutorials - -Step-by-step guides to learn Letta fundamentals. - - - -Build your first Letta agent in minutes - - -Create an agent that can answer questions about PDF documents - - -Learn how to dynamically manage agent memory - - -Share memory between multiple agents for coordination - - - -## Ready-to-Deploy Applications - -Production-ready templates you can clone and customize. - - - -Full-stack chatbot with per-user agent memory (Next.js + TypeScript) - - -Discord bot with persistent memory for each server and user - - -Create AI characters with memory that persists across conversations - - -Research agent that gathers and synthesizes information over time - - - -## Multi-Agent Systems - -Build coordinated teams of specialized agents. - - - -Connect agents to chat with each other and users simultaneously - - -Template for building relationship-aware agents for each customer - - - -## Tools & Integrations - -Connect Letta to your favorite platforms and tools. - - - -Use Letta with Vercel AI SDK v5 - - -Connect agents to 7,000+ apps - - -Integrate with n8n automation workflows - - -Deploy agents on Telegram - - -Add Letta agents to your knowledge base - - -SQL-powered data analysis agent - - - -## SDK Examples - -Learn the basics with minimal code examples. - - - -Basic TypeScript/Node.js SDK example - - -Basic Python SDK example - - - -## Community Projects - -Amazing projects built by the Letta community. - - - -Deploy Letta agents to an ATProto-powered multi-agent chatroom - - -IRC-style CLI for the Thought Stream - - - -## Learning Resources - - - -Free course: LLMs as Operating Systems - Building Agents with Memory - - -Understand how Letta agents work - - -Complete API documentation - - -Read about the research behind Letta - - - -## More Resources - - - -Comprehensive curated list of Letta resources, tools, and community projects - - -Get help and share your projects with the community - - diff --git a/fern/pages/deployment/railway.mdx b/fern/pages/deployment/railway.mdx deleted file mode 100644 index 419b988c..00000000 --- a/fern/pages/deployment/railway.mdx +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Deploy Letta Server on Railway -slug: guides/server/railway ---- - -[Railway](https://railway.app) is a service that allows you to easily deploy services (such as Docker containers) to the cloud. The following example uses Railway, but the same general principles around deploying the Letta Docker image on a cloud service and connecting it to the ADE) are generally applicable to other cloud services beyond Railway. - -## Deploying the Letta Railway template - -We've prepared a Letta Railway template that has the necessary environment variables set and mounts a persistent volume for database storage. -You can access the template by clicking the "Deploy on Railway" button below: - -[![Deploy on Railway](https://railway.com/button.svg)](https://railway.app/template/jgUR1t?referralCode=kdR8zc) - - - - - - - - - - - - - -## Accessing the deployment via the ADE - -Now that the Railway deployment is active, all we need to do to access it via the ADE is add it to as a new remote Letta server. -The default password set in the template is `password`, which can be changed at the deployment stage or afterwards in the 'variables' page on the Railway deployment. - -Click "Add remote server", then enter the details from Railway (use the static IP address shown in the logs, and use the password set via the environment variables): - - - - -## Accessing the deployment via the Letta API - -Accessing the deployment via the [Letta API](https://docs.letta.com/api-reference) is simple, we just need to swap the base URL of the endpoint with the IP address from the Railway deployment. - -For example if the Railway IP address is `https://MYSERVER.up.railway.app` and the password is `banana`, to create an agent on the deployment, we can use the following shell command: -```sh -curl --request POST \ - --url https://MYSERVER.up.railway.app/v1/agents/ \ - --header 'X-BARE-PASSWORD: password banana' \ - --header 'Content-Type: application/json' \ - --data '{ - "memory_blocks": [ - { - "label": "human", - "value": "The human'\''s name is Bob the Builder" - }, - { - "label": "persona", - "value": "My name is Sam, the all-knowing sentient AI." - } - ], - "llm_config": { - "model": "gpt-4o-mini", - "model_endpoint_type": "openai", - "model_endpoint": "https://api.openai.com/v1", - "context_window": 16000 - }, - "embedding_config": { - "embedding_endpoint_type": "openai", - "embedding_endpoint": "https://api.openai.com/v1", - "embedding_model": "text-embedding-3-small", - "embedding_dim": 8191 - } -}' -``` - -This will create an agent with two memory blocks, configured to use `gpt-4o-mini` as the LLM model, and `text-embedding-3-small` as the embedding model. - -If the Letta server is not password protected, we can omit the `X-BARE-PASSWORD` header. - -That's it! Now you should be able to create and interact with agents on your remote Letta server (deployed on Railway) via the Letta ADE and API. 👾 ☄️ - -### Adding additional environment variables - -To help you get started, when you deploy the template you have the option to fill in the example environment variables `OPENAI_API_KEY` (to connect your Letta agents to GPT models) and `ANTHROPIC_API_KEY` (to connect your Letta agents to Claude models). - -There are many more providers you can enable on the Letta server via additional environment variables (for example vLLM, Ollama, etc). For more information on available providers, see [our documentation](/guides/server/docker). - -To connect Letta to an additional API provider, you can go to your Railway deployment (after you've deployed the template), click `Variables` to see the current environment variables, then click `+ New Variable` to add a new variable. Once you've saved a new variable, you will need to restart the server for the changes to take effect. diff --git a/fern/pages/deployment/telemetry.mdx b/fern/pages/deployment/telemetry.mdx deleted file mode 100644 index 7001f3d0..00000000 --- a/fern/pages/deployment/telemetry.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Collecting Traces & Telemetry -slug: guides/server/otel ---- - - -**ClickHouse is optional** and only required for telemetry/observability features. Letta works perfectly fine without it using just PostgreSQL. You only need ClickHouse if you want to collect traces, view LLM provider requests, or analyze system performance metrics. - - -Letta uses [ClickHouse](https://clickhouse.com/) to store telemetry. ClickHouse is a database optimized for storing logs and traces. Traces can be used to view raw requests to LLM providers and also understand your agent's system performance metrics. - -## Configuring ClickHouse -You will need to have a ClickHouse DB (either running locally or with [ClickHouse Cloud](https://console.clickhouse.cloud/)) to connect to Letta. - -You can configure ClickHouse by passing the required enviornment variables: -```sh -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - -p 8283:8283 \ - ... - -e CLICKHOUSE_ENDPOINT=${CLICKHOUSE_ENDPOINT} \ - -e CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE} \ - -e CLICKHOUSE_USERNAME=${CLICKHOUSE_USERNAME} \ - -e CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD} \ - -e LETTA_OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 \ - letta/letta:latest -``` - -### Finding your credentials in ClickHouse Cloud -You can find these variable inside of ClickHouse Cloud by selecting the "Connection" button in the dashboard. - - - -## Connecting to Grafana -We recommend connecting ClickHouse to Grafana to query and view traces. Grafana can be run [locally](https://grafana.com/oss/grafana/), or via [Grafana Cloud](https://grafana.com/grafana/). - - -# Other Integrations - -Letta also supports other exporters when running in a containerized environment. To request support for another exporter, please open an issue on [GitHub](https://github.com/letta-ai/letta/issues/new/choose). - -## Configuring Signoz - -You can configure Signoz by passing the required enviornment variables: -```sh -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - -p 8283:8283 \ - ... - -e SIGNOZ_ENDPOINT=${SIGNOZ_ENDPOINT} \ - -e SIGNOZ_INGESTION_KEY=${SIGNOZ_INGESTION_KEY} \ - -e LETTA_OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 \ - letta/letta:latest -``` diff --git a/fern/pages/evals/advanced/custom-graders.mdx b/fern/pages/evals/advanced/custom-graders.mdx deleted file mode 100644 index bce7b893..00000000 --- a/fern/pages/evals/advanced/custom-graders.mdx +++ /dev/null @@ -1,66 +0,0 @@ -# Custom Graders - -Write your own grading functions to implement custom evaluation logic. - - -Custom graders let you implement domain-specific evaluation, parse complex formats, and apply custom scoring algorithms. - - -## Basic Structure - -```python -from letta_evals.decorators import grader -from letta_evals.models import GradeResult, Sample - -@grader -def my_custom_grader(sample: Sample, submission: str) -> GradeResult: - """Custom grading logic.""" - - # Your evaluation logic - score = calculate_score(submission, sample.ground_truth) - - # Ensure score is between 0.0 and 1.0 - score = max(0.0, min(1.0, score)) - - return GradeResult( - score=score, - rationale=f"Score based on custom logic: {score}" - ) -``` - -## Example: JSON Validation - -```python -import json -from letta_evals.decorators import grader -from letta_evals.models import GradeResult, Sample - -@grader -def valid_json(sample: Sample, submission: str) -> GradeResult: - """Check if submission is valid JSON.""" - try: - json.loads(submission) - return GradeResult(score=1.0, rationale="Valid JSON") - except json.JSONDecodeError as e: - return GradeResult(score=0.0, rationale=f"Invalid JSON: {e}") -``` - -## Registration - -Custom graders are automatically registered when you import them in your suite's setup script or custom evaluators file. - -## Configuration - -```yaml -graders: - my_metric: - kind: tool - function: my_custom_grader # Your function name - extractor: last_assistant -``` - -## Next Steps - -- [Tool Graders](/evals/graders/tool-graders) - Built-in grading functions -- [Graders Concept](/evals/core-concepts/graders) - Understanding graders -- [Example Custom Graders](https://github.com/letta-ai/letta-evals/tree/main/examples) - See examples in the letta-evals repo diff --git a/fern/pages/evals/advanced/multi-turn-conversations.mdx b/fern/pages/evals/advanced/multi-turn-conversations.mdx deleted file mode 100644 index 269761ef..00000000 --- a/fern/pages/evals/advanced/multi-turn-conversations.mdx +++ /dev/null @@ -1,94 +0,0 @@ -# Multi-Turn Conversations - -Multi-turn conversations allow you to test how agents handle context across multiple exchanges. - - -This is essential for stateful agents where behavior depends on conversation history. - - -## Why Use Multi-Turn? - -Multi-turn conversations enable testing that single-turn prompts cannot: - -- **Memory storage**: Verify agents persist information to memory blocks -- **Tool call sequences**: Test multi-step workflows -- **Context retention**: Ensure agents remember details from earlier -- **State evolution**: Track how agent state changes across interactions -- **Conversational coherence**: Test if agents maintain context appropriately - -## Format - -### Single-Turn (Default) - -```jsonl -{"input": "What is the capital of France?", "ground_truth": "Paris"} -``` - -### Multi-Turn - -```jsonl -{"input": ["My name is Alice", "What's my name?"], "ground_truth": "Alice"} -``` - -The agent processes each input in sequence, with state carrying over between turns. - -## Example 1: Memory Recall Testing - -Test if the agent remembers information across turns: - -```jsonl -{"input": ["Remember that my favorite color is blue", "What's my favorite color?"], "ground_truth": "blue"} -``` - -Suite configuration: -```yaml -graders: - response_check: - kind: tool - function: contains - extractor: last_assistant # Check the agent's response -``` - -## Example 2: Memory Correction Testing - -Test if the agent correctly updates memory when users correct themselves: - -```jsonl -{"input": ["Please remember that I like bananas.", "Actually, sorry, I meant I like apples."], "ground_truth": "apples"} -``` - -Suite configuration: -```yaml -graders: - memory_check: - kind: tool - function: contains - extractor: memory_block - extractor_config: - block_label: human # Check the actual memory block, not just the response -``` - - -**Key difference:** The `memory_block` extractor verifies the agent actually stored the corrected information in memory, not just that it responded correctly. This tests real memory persistence. - - -## When to Test Memory Blocks vs. Responses - -**Use `last_assistant` or `all_assistant` extractors when:** -- Testing what the agent says in conversation -- Verifying response content and phrasing -- Checking conversational coherence - -**Use `memory_block` extractor when:** -- Verifying information was actually stored in memory -- Testing memory updates and corrections -- Validating persistent state changes -- Ensuring the agent's internal state is correct - -See the [multiturn-memory-block-extractor example](https://github.com/letta-ai/letta-evals/tree/main/examples/multiturn-memory-block-extractor) for a complete working implementation. - -## Next Steps - -- [Datasets](/evals/core-concepts/datasets) - Creating test datasets -- [Extractors](/evals/core-concepts/extractors) - Extracting from trajectories -- [Targets](/evals/core-concepts/targets) - Agent lifecycle and testing behavior diff --git a/fern/pages/evals/cli/commands.mdx b/fern/pages/evals/cli/commands.mdx deleted file mode 100644 index 0b8eee31..00000000 --- a/fern/pages/evals/cli/commands.mdx +++ /dev/null @@ -1,342 +0,0 @@ -# CLI Commands - -The **letta-evals** command-line interface lets you run evaluations, validate configurations, and inspect available components. - - -**Quick overview:** -- **`run`** - Execute an evaluation suite (most common) -- **`validate`** - Check suite configuration without running -- **`list-extractors`** - Show available extractors -- **`list-graders`** - Show available grader functions -- **Exit codes** - 0 for pass, 1 for fail (perfect for CI/CD) - - -**Typical workflow:** -1. Validate your suite: `letta-evals validate suite.yaml` -2. Run evaluation: `letta-evals run suite.yaml --output results/` -3. Check exit code: `echo $?` (0 = passed, 1 = failed) - -## run - -Run an evaluation suite. - -```bash -letta-evals run [options] -``` - -### Arguments - -- `suite.yaml`: Path to the suite configuration file (required) - -### Options - -#### --output, -o -Save results to a directory. - -```bash -letta-evals run suite.yaml --output results/ -``` - -Creates: -- `results/header.json`: Evaluation metadata -- `results/summary.json`: Aggregate metrics and configuration -- `results/results.jsonl`: Per-sample results (one JSON per line) - -#### --quiet, -q -Quiet mode - only show pass/fail result. - -```bash -letta-evals run suite.yaml --quiet -``` - -Output: -``` -✓ PASSED -``` - -#### --max-concurrent -Maximum concurrent sample evaluations. **Default**: 15 - -```bash -letta-evals run suite.yaml --max-concurrent 10 -``` - -Higher values = faster evaluation but more resource usage. - -#### --api-key -Letta API key (overrides LETTA_API_KEY environment variable). - -```bash -letta-evals run suite.yaml --api-key your-key -``` - -#### --base-url -Letta server base URL (overrides suite config and environment variable). - -```bash -letta-evals run suite.yaml --base-url http://localhost:8283 -``` - -#### --project-id -Letta project ID for cloud deployments. - -```bash -letta-evals run suite.yaml --project-id proj_abc123 -``` - -#### --cached, -c -Path to cached results (JSONL) for re-grading trajectories without re-running the agent. - -```bash -letta-evals run suite.yaml --cached previous_results.jsonl -``` - -Use this to test different graders on the same agent trajectories. - -#### --num-runs -Run the evaluation multiple times to measure consistency. **Default**: 1 - -```bash -letta-evals run suite.yaml --num-runs 10 -``` - -**Output with multiple runs:** -- Each run creates a separate `run_N/` directory with individual results -- An `aggregate_stats.json` file contains statistics across all runs (mean, standard deviation, pass rate) - -### Examples - -Basic run: -```bash -letta-evals run suite.yaml # Run evaluation, show results in terminal -``` - -Save results: -```bash -letta-evals run suite.yaml --output evaluation-results/ # Save to directory -``` - -Letta Cloud: -```bash -letta-evals run suite.yaml \ - --base-url https://api.letta.com \ - --api-key $LETTA_API_KEY \ - --project-id proj_abc123 -``` - -Quiet CI mode: -```bash -letta-evals run suite.yaml --quiet -if [ $? -eq 0 ]; then - echo "Evaluation passed" -else - echo "Evaluation failed" - exit 1 -fi -``` - -### Exit Codes - -- `0`: Evaluation passed (gate criteria met) -- `1`: Evaluation failed (gate criteria not met or error) - -## validate - -Validate a suite configuration without running it. - -```bash -letta-evals validate -``` - -Checks: -- YAML syntax is valid -- Required fields are present -- Paths exist -- Configuration is consistent -- Grader/extractor combinations are valid - -Output on success: -``` -✓ Suite configuration is valid -``` - -Output on error: -``` -✗ Validation failed: - - Agent file not found: agent.af - - Grader 'my_metric' references unknown function -``` - -## list-extractors - -List all available extractors. - -```bash -letta-evals list-extractors -``` - -Output: -``` -Available extractors: - last_assistant - Extract the last assistant message - first_assistant - Extract the first assistant message - all_assistant - Concatenate all assistant messages - pattern - Extract content matching regex - tool_arguments - Extract tool call arguments - tool_output - Extract tool return value - after_marker - Extract content after a marker - memory_block - Extract from memory block (requires agent_state) -``` - -## list-graders - -List all available grader functions. - -```bash -letta-evals list-graders -``` - -Output: -``` -Available graders: - exact_match - Exact string match with ground_truth - contains - Check if contains ground_truth - regex_match - Match regex pattern - ascii_printable_only - Validate ASCII-only content -``` - -## help - -Show help information. - -```bash -letta-evals --help -``` - -Show help for a specific command: - -```bash -letta-evals run --help -letta-evals validate --help -``` - -## Environment Variables - -### LETTA_API_KEY -API key for Letta authentication. - -```bash -export LETTA_API_KEY=your-key-here -``` - -### LETTA_BASE_URL -Letta server base URL. - -```bash -export LETTA_BASE_URL=http://localhost:8283 -``` - -### LETTA_PROJECT_ID -Letta project ID (for cloud). - -```bash -export LETTA_PROJECT_ID=proj_abc123 -``` - -### OPENAI_API_KEY -OpenAI API key (for rubric graders). - -```bash -export OPENAI_API_KEY=your-openai-key -``` - -## Configuration Priority - -Configuration values are resolved in this order (highest to lowest priority): - -1. CLI arguments (`--api-key`, `--base-url`, `--project-id`) -2. Suite YAML configuration -3. Environment variables - -## Using in CI/CD - -### GitHub Actions - -```yaml -name: Run Evals -on: [push] - -jobs: - evaluate: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - name: Install dependencies - run: pip install letta-evals - - - name: Run evaluation - env: - LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: | - letta-evals run suite.yaml --quiet --output results/ - - - name: Upload results - uses: actions/upload-artifact@v2 - with: - name: eval-results - path: results/ -``` - -### GitLab CI - -```yaml -evaluate: - script: - - pip install letta-evals - - letta-evals run suite.yaml --quiet --output results/ - artifacts: - paths: - - results/ - variables: - LETTA_API_KEY: $LETTA_API_KEY - OPENAI_API_KEY: $OPENAI_API_KEY -``` - -## Debugging - -### Common Issues - - -**"Agent file not found"** - -```bash -# Check file exists relative to suite YAML location -ls -la path/to/agent.af -``` - - - -**"Connection refused"** - -```bash -# Verify Letta server is running -curl http://localhost:8283/v1/health -``` - - - -**"Invalid API key"** - -```bash -# Check environment variable is set -echo $LETTA_API_KEY -``` - - -## Next Steps - -- [Understanding Results](/evals/results-metrics/understanding-results) - Interpreting evaluation output -- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete configuration options -- [Getting Started](/evals/get-started/getting-started) - Complete tutorial with examples diff --git a/fern/pages/evals/concepts/datasets.mdx b/fern/pages/evals/concepts/datasets.mdx deleted file mode 100644 index 70c03183..00000000 --- a/fern/pages/evals/concepts/datasets.mdx +++ /dev/null @@ -1,425 +0,0 @@ -# Datasets - -**Datasets** are the test cases that define what your agent will be evaluated on. Each sample in your dataset represents one evaluation scenario. - - -**Quick overview:** -- **Two formats**: JSONL (flexible, powerful) or CSV (simple, spreadsheet-friendly) -- **Required field**: `input` - the prompt(s) to send to the agent -- **Common fields**: `ground_truth` (expected answer), `tags` (for filtering), `metadata` (extra info) -- **Advanced fields**: `agent_args` (customize agent per sample), `rubric_vars` (per-sample rubric context) -- **Multi-turn support**: Send multiple messages in sequence using arrays - - -**Typical workflow:** -1. Create a JSONL or CSV file with test cases -2. Reference it in your suite YAML: `dataset: test_cases.jsonl` -3. Run evaluation - each sample is tested independently -4. Results show per-sample and aggregate scores - -Datasets can be created in two formats: **JSONL** or **CSV**. Choose based on your team's workflow and complexity needs. - -## Dataset Formats - -### JSONL Format - -Each line is a JSON object representing one test case: - -```jsonl -{"input": "What's the capital of France?", "ground_truth": "Paris"} -{"input": "Calculate 2+2", "ground_truth": "4"} -{"input": "What color is the sky?", "ground_truth": "blue"} -``` - -**Best for:** -- Complex data structures (nested objects, arrays) -- Multi-turn conversations -- Advanced features (agent_args, rubric_vars) -- Teams comfortable with JSON/code -- Version control (clean line-by-line diffs) - -### CSV Format - -Standard CSV with headers: - -```csv -input,ground_truth -"What's the capital of France?","Paris" -"Calculate 2+2","4" -"What color is the sky?","blue" -``` - -**Best for:** -- Simple question-answer pairs -- Teams that prefer spreadsheets (Excel, Google Sheets) -- Non-technical collaborators creating test cases -- Quick dataset creation and editing -- Easy sharing with non-developers - -## Quick Reference - -| Field | Required | Type | Purpose | -|-------|----------|------|---------| -| `input` | ✅ | string or array | Prompt(s) to send to agent | -| `ground_truth` | ❌ | string | Expected answer (for tool graders) | -| `tags` | ❌ | array of strings | For filtering samples | -| `agent_args` | ❌ | object | Per-sample agent customization | -| `rubric_vars` | ❌ | object | Per-sample rubric variables | -| `metadata` | ❌ | object | Arbitrary extra data | -| `id` | ❌ | integer | Sample ID (auto-assigned if omitted) | - -## Field Reference - -### Required Fields - -#### input -The prompt(s) to send to the agent. Can be a string or array of strings: - -Single message: -```json -{"input": "Hello, who are you?"} -``` - -Multi-turn conversation: -```json -{"input": ["Hello", "What's your name?", "Tell me about yourself"]} -``` - -### Optional Fields - -#### ground_truth -The expected answer or content to check against. Required for most tool graders (exact_match, contains, etc.): - -```json -{"input": "What is 2+2?", "ground_truth": "4"} -``` - -#### metadata -Arbitrary additional data about the sample: - -```json -{ - "input": "What is photosynthesis?", - "ground_truth": "process where plants convert light into energy", - "metadata": { - "category": "biology", - "difficulty": "medium" - } -} -``` - -#### tags -List of tags for filtering samples: - -```json -{"input": "Solve x^2 = 16", "ground_truth": "4", "tags": ["math", "algebra"]} -``` - -Filter by tags in your suite: -```yaml -sample_tags: [math] # Only samples tagged "math" will be evaluated -``` - -#### agent_args - -Custom arguments passed to programmatic agent creation when using `agent_script`. Allows per-sample agent customization. - -JSONL: -```json -{ - "input": "What items do we have?", - "agent_args": { - "item": {"sku": "SKU-123", "name": "Widget A", "price": 19.99} - } -} -``` - -CSV: -```csv -input,agent_args -"What items do we have?","{""item"": {""sku"": ""SKU-123"", ""name"": ""Widget A"", ""price"": 19.99}}" -``` - -Your agent factory function can access these values via `sample.agent_args` to customize agent configuration. - -See [Targets - agent_script](/evals/core-concepts/targets#agent_script) for details on programmatic agent creation. - -#### rubric_vars - -Variables to inject into rubric templates when using rubric graders. This allows you to provide per-sample context or examples to the LLM judge. - -**Example:** Evaluating code quality against a reference implementation. - -JSONL: -```jsonl -{"input": "Write a function to calculate fibonacci numbers", "rubric_vars": {"reference_code": "def fib(n):\n if n <= 1: return n\n return fib(n-1) + fib(n-2)", "required_features": "recursion, base case"}} -``` - -CSV: -```csv -input,rubric_vars -"Write a function to calculate fibonacci numbers","{""reference_code"": ""def fib(n):\n if n <= 1: return n\n return fib(n-1) + fib(n-2)"", ""required_features"": ""recursion, base case""}" -``` - -In your rubric template file, reference variables with `{variable_name}`: - -**rubric.txt:** -``` -Evaluate the submitted code against this reference implementation: - -{reference_code} - -Required features: {required_features} - -Score on correctness (0.6) and code quality (0.4). -``` - -When the rubric grader runs, variables are replaced with values from `rubric_vars`: - -**Final formatted prompt sent to LLM:** -``` -Evaluate the submitted code against this reference implementation: - -def fib(n): - if n <= 1: return n - return fib(n-1) + fib(n-2) - -Required features: recursion, base case - -Score on correctness (0.6) and code quality (0.4). -``` - -This lets you customize evaluation criteria per sample using the same rubric template. - -See [Rubric Graders](/evals/graders/rubric-graders) for details on rubric templates. - -#### id -Sample ID is automatically assigned (0-based index) if not provided. You can override: - -```json -{"id": 42, "input": "Test case 42"} -``` - -## Complete Example - -```jsonl -{"id": 1, "input": "What is the capital of France?", "ground_truth": "Paris", "tags": ["geography", "easy"], "metadata": {"region": "Europe"}} -{"id": 2, "input": "Calculate the square root of 144", "ground_truth": "12", "tags": ["math", "medium"]} -{"id": 3, "input": ["Hello", "What can you help me with?"], "tags": ["conversation"]} -``` - -## Dataset Best Practices - -### 1. Clear Ground Truth - -Make ground truth specific enough to grade but flexible enough to match valid responses: - - -Good: -```json -{"input": "What's the largest planet?", "ground_truth": "Jupiter"} -``` - - - -Too strict (might miss valid answers): -```json -{"input": "What's the largest planet?", "ground_truth": "Jupiter is the largest planet in our solar system."} -``` - - -### 2. Diverse Test Cases - -Include edge cases and variations: - -```jsonl -{"input": "What is 2+2?", "ground_truth": "4", "tags": ["math", "easy"]} -{"input": "What is 0.1 + 0.2?", "ground_truth": "0.3", "tags": ["math", "floating_point"]} -{"input": "What is 999999999 + 1?", "ground_truth": "1000000000", "tags": ["math", "large_numbers"]} -``` - -### 3. Use Tags for Organization - -Organize samples by type, difficulty, or feature: - -```json -{"tags": ["tool_usage", "search"]} -{"tags": ["memory", "recall"]} -{"tags": ["reasoning", "multi_step"]} -``` - -### 4. Multi-Turn Conversations - -Test conversational context and memory updates: - -```jsonl -{"input": ["My name is Alice", "What's my name?"], "ground_truth": "Alice", "tags": ["memory", "recall"]} -{"input": ["Please remember that I like bananas.", "Actually, sorry, I meant I like apples."], "ground_truth": "apples", "tags": ["memory", "correction"]} -{"input": ["I work at Google", "Update my workplace to Microsoft", "Where do I work?"], "ground_truth": "Microsoft", "tags": ["memory", "multi_step"]} -``` - - -**Testing memory corrections:** Use multi-turn inputs to test if agents properly update memory when users correct themselves. Combine with the `memory_block` extractor to verify the final memory state, not just the response. - - -### 5. No Ground Truth for LLM Judges - -If using rubric graders, ground truth is optional: - -```jsonl -{"input": "Write a creative story about a robot", "tags": ["creative"]} -{"input": "Explain quantum computing simply", "tags": ["explanation"]} -``` - -The LLM judge evaluates based on the rubric, not ground truth. - -## Loading Datasets - -Datasets are automatically loaded by the runner: - -```yaml -dataset: path/to/dataset.jsonl # Path to your test cases (JSONL or CSV) -``` - -Paths are relative to the suite YAML file location. - -## Dataset Filtering - -### Limit Sample Count - -```yaml -max_samples: 10 # Only evaluate first 10 samples (useful for testing) -``` - -### Filter by Tags - -```yaml -sample_tags: [math, medium] # Only samples with ALL these tags -``` - -## Creating Datasets Programmatically - -You can generate datasets with Python: - -```python -import json - -samples = [] -for i in range(100): - samples.append({ - "input": f"What is {i} + {i}?", - "ground_truth": str(i + i), - "tags": ["math", "addition"] - }) - -with open("dataset.jsonl", "w") as f: - for sample in samples: - f.write(json.dumps(sample) + "\n") -``` - -## Dataset Format Validation - -The runner validates: -- Each line is valid JSON -- Required fields are present -- Field types are correct - -Validation errors will be reported with line numbers. - -## Examples by Use Case - -### Question Answering - -JSONL: -```jsonl -{"input": "What is the capital of France?", "ground_truth": "Paris"} -{"input": "Who wrote Romeo and Juliet?", "ground_truth": "Shakespeare"} -``` - -CSV: -```csv -input,ground_truth -"What is the capital of France?","Paris" -"Who wrote Romeo and Juliet?","Shakespeare" -``` - -### Tool Usage Testing - -JSONL: -```jsonl -{"input": "Search for information about pandas", "ground_truth": "search"} -{"input": "Calculate 15 * 23", "ground_truth": "calculator"} -``` - -CSV: -```csv -input,ground_truth -"Search for information about pandas","search" -"Calculate 15 * 23","calculator" -``` - -Ground truth = expected tool name. - -### Memory Testing (Multi-turn) - -JSONL: -```jsonl -{"input": ["Remember that my favorite color is blue", "What's my favorite color?"], "ground_truth": "blue"} -{"input": ["I live in Tokyo", "Where do I live?"], "ground_truth": "Tokyo"} -``` - -CSV (using JSON array strings): -```csv -input,ground_truth -"[""Remember that my favorite color is blue"", ""What's my favorite color?""]","blue" -"[""I live in Tokyo"", ""Where do I live?""]","Tokyo" -``` - -### Code Generation - -JSONL: -```jsonl -{"input": "Write a function to reverse a string in Python"} -{"input": "Create a SQL query to find users older than 21"} -``` - -CSV: -```csv -input -"Write a function to reverse a string in Python" -"Create a SQL query to find users older than 21" -``` - -Use rubric graders to evaluate code quality. - -## CSV Advanced Features - -CSV supports all the same features as JSONL by encoding complex data as JSON strings in cells: - -**Multi-turn conversations** (requires escaped JSON array string): -```csv -input,ground_truth -"[""Hello"", ""What's your name?""]","Alice" -``` - -**Agent arguments** (requires escaped JSON object string): -```csv -input,agent_args -"What items do we have?","{""initial_inventory"": [""apple"", ""banana""]}" -``` - -**Rubric variables** (requires escaped JSON object string): -```csv -input,rubric_vars -"Write a story","{""max_length"": 500, ""genre"": ""sci-fi""}" -``` - - -**Note:** Complex data structures require JSON encoding in CSV. If you're frequently using these advanced features, JSONL may be easier to read and maintain. - - -## Next Steps - -- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete configuration options including filtering -- [Graders](/evals/core-concepts/graders) - How to evaluate agent responses -- [Multi-Turn Conversations](/evals/advanced/multi-turn-conversations) - Testing conversational flows diff --git a/fern/pages/evals/concepts/extractors.mdx b/fern/pages/evals/concepts/extractors.mdx deleted file mode 100644 index 247e7904..00000000 --- a/fern/pages/evals/concepts/extractors.mdx +++ /dev/null @@ -1,374 +0,0 @@ -# Extractors - -**Extractors** select what content to evaluate from an agent's response. They navigate the conversation trajectory and extract the specific piece you want to grade. - - -**Quick overview:** -- **Purpose**: Agent responses are complex (messages, tool calls, memory) - extractors isolate what to grade -- **Built-in options**: last_assistant, tool_arguments, memory_block, pattern, and more -- **Flexible**: Different graders can use different extractors in the same suite -- **Automatic**: No setup needed - just specify in your grader config - - -**Common patterns:** -- `last_assistant` - Most common, gets the agent's final message (90% of use cases) -- `tool_arguments` - Verify agent called the right tool with correct args -- `memory_block` - Check if agent updated memory correctly -- `pattern` - Extract structured data with regex - -Extractors determine what part of the agent's response gets graded. They pull out specific content from the conversation trajectory. - -## Why Extractors? - -An agent's response is complex - it includes assistant messages, tool calls, tool returns, memory updates, etc. Extractors let you focus on exactly what you want to evaluate. - -**The evaluation flow:** -``` -Agent Response → Extractor → Submission Text → Grader → Score -``` - -For example: -``` -Full trajectory: - UserMessage: "What's the capital of France?" - ToolCallMessage: search(query="capital of france") - ToolReturnMessage: "Paris is the capital..." - AssistantMessage: "The capital of France is Paris." - -↓ extractor: last_assistant ↓ - -Extracted: "The capital of France is Paris." - -↓ grader: contains (ground_truth="Paris") ↓ - -Score: 1.0 -``` - -## Trajectory Structure - -A trajectory is a list of turns, where each turn is a list of Letta messages: - -```python -[ - [UserMessage(...), AssistantMessage(...), ToolCallMessage(...), ToolReturnMessage(...)], # Turn 1 - [AssistantMessage(...)] # Turn 2 -] -``` - -Extractors navigate this structure to pull out the submission text. - -## Built-in Extractors - -### last_assistant - -Extracts the last assistant message content. - -```yaml -graders: - quality: - kind: tool - function: contains - extractor: last_assistant # Extract final agent message -``` - -Most common extractor - gets the agent's final response. - -### first_assistant - -Extracts the first assistant message content. - -```yaml -graders: - initial_response: - kind: tool - function: contains - extractor: first_assistant # Extract first agent message -``` - -Useful for testing immediate responses before tool usage. - -### all_assistant - -Concatenates all assistant messages with a separator. - -```yaml -graders: - complete_response: - kind: rubric - prompt_path: rubric.txt - extractor: all_assistant # Concatenate all agent messages - extractor_config: - separator: "\n\n" # Join messages with double newline -``` - -Use when you need the full conversation context. - -### last_turn - -Extracts all assistant messages from the last turn only. - -```yaml -graders: - final_turn: - kind: tool - function: contains - extractor: last_turn # Messages from final turn only - extractor_config: - separator: " " # Join with spaces -``` - -Useful when the agent makes multiple statements in the final turn. - -### pattern - -Extracts content matching a regex pattern from assistant messages. - -```yaml -graders: - extract_number: - kind: tool - function: exact_match - extractor: pattern # Extract using regex - extractor_config: - pattern: 'Result: (\d+)' # Regex pattern to match - group: 1 # Extract capture group 1 - search_all: false # Only find first match -``` - -Example: Extract "42" from "The answer is Result: 42" - -### tool_arguments - -Extracts arguments from a specific tool call. - -```yaml -graders: - search_query: - kind: tool - function: contains - extractor: tool_arguments # Extract tool call arguments - extractor_config: - tool_name: search # Which tool to extract from -``` - -Returns the JSON arguments as a string. - -Example: If agent calls `search(query="pandas", limit=10)`, extracts: -```json -{"query": "pandas", "limit": 10} -``` - -### tool_output - -Extracts the return value from a specific tool call. - -```yaml -graders: - search_results: - kind: tool - function: contains - extractor: tool_output # Extract tool return value - extractor_config: - tool_name: search # Which tool's output to extract -``` - -Finds the tool call and its corresponding return message. - -### after_marker - -Extracts content after a specific marker string. - -```yaml -graders: - answer_section: - kind: tool - function: contains - extractor: after_marker # Extract content after marker - extractor_config: - marker: "ANSWER:" # Marker string to find - include_marker: false # Don't include "ANSWER:" in output -``` - -Example: From "Here's my analysis... ANSWER: Paris", extracts "Paris" - -### memory_block - -Extracts content from a specific memory block (requires agent_state). - -```yaml -graders: - human_memory: - kind: tool - function: exact_match - extractor: memory_block # Extract from agent memory - extractor_config: - block_label: human # Which memory block to extract -``` - - -**Important**: This extractor requires the agent's final state, which adds overhead. The runner automatically fetches agent_state when this extractor is used. - - -Example use case: Verify the agent correctly updated its memory about the user. - -## Extractor Configuration - -Some extractors accept additional configuration via `extractor_config`: - -```yaml -graders: - my_metric: - kind: tool - function: contains - extractor: pattern # Use pattern extractor - extractor_config: # Configuration for this extractor - pattern: 'Answer: (.*)' # Regex pattern - group: 1 # Extract capture group 1 -``` - -## Choosing an Extractor - -| Use Case | Recommended Extractor | -|----------|---------------------| -| Final agent response | `last_assistant` | -| First response before tools | `first_assistant` | -| Complete conversation | `all_assistant` | -| Specific format extraction | `pattern` | -| Tool usage validation | `tool_arguments` | -| Tool result checking | `tool_output` | -| Memory validation | `memory_block` | -| Structured output | `after_marker` | - -## Content Flattening - -Assistant messages can contain multiple content parts. Extractors automatically flatten complex content to plain text. - -## Empty Extraction - -If an extractor finds no matching content, it returns an empty string `""`. This typically results in a score of 0.0 from the grader. - -## Custom Extractors - -You can write custom extractors. See [Custom Extractors](/evals/extractors/custom-extractors) for details. - -Example: - -```python -from letta_evals.decorators import extractor -from letta_client import LettaMessageUnion - -@extractor -def my_extractor(trajectory: List[List[LettaMessageUnion]], config: dict) -> str: - # Custom extraction logic - return extracted_text -``` - -Register by importing in your suite's setup script or custom evaluators file. - -## Multi-Metric Extraction - -Different graders can use different extractors: - -```yaml -graders: - response_quality: # Evaluate final message quality - kind: rubric - prompt_path: quality.txt - extractor: last_assistant # Extract final response - - tool_usage: # Check tool was called correctly - kind: tool - function: exact_match - extractor: tool_arguments # Extract tool args - extractor_config: - tool_name: search # From search tool - - memory_update: # Verify memory updated - kind: tool - function: contains - extractor: memory_block # Extract from memory - extractor_config: - block_label: human # Human memory block -``` - -Each grader independently extracts and evaluates different aspects. - -## Listing Extractors - -See all available extractors: - -```bash -letta-evals list-extractors -``` - -## Examples - -### Extract Final Answer - -```yaml -extractor: last_assistant # Get final agent message -``` - -Agent: "Let me search... *uses tool* ... The answer is Paris." -Extracted: "The answer is Paris." - -### Extract Tool Arguments - -```yaml -extractor: tool_arguments # Get tool call args -extractor_config: - tool_name: search # From search tool -``` - -Agent calls: `search(query="pandas", limit=5)` -Extracted: `{"query": "pandas", "limit": 5}` - -### Extract Pattern - -```yaml -extractor: pattern # Extract with regex -extractor_config: - pattern: 'RESULT: (\w+)' # Match pattern - group: 1 # Extract capture group 1 -``` - -Agent: "After calculation... RESULT: SUCCESS" -Extracted: "SUCCESS" - -### Extract Memory - -```yaml -extractor: memory_block # Extract from agent memory -extractor_config: - block_label: human # Human memory block -``` - -Agent updates memory block "human" to: "User's name is Alice" -Extracted: "User's name is Alice" - -## Troubleshooting - - -**Extractor returns empty string** - -**Problem**: Grader always gives score 0.0 because extractor finds nothing. - -**Common causes**: -- **Wrong extractor**: Using `first_assistant` but agent doesn't respond until after tool use → use `last_assistant` -- **Wrong tool name**: `tool_arguments` with `tool_name: "search"` but agent calls `"web_search"` → check actual tool name -- **Wrong memory block**: `memory_block` with `block_label: "user"` but block is actually labeled `"human"` → check block labels -- **Pattern doesn't match**: `pattern: "Answer: (.*)"` but agent says "The answer is..." → adjust regex - - - -**Debug tips**: -1. Check the trajectory in results JSON to see actual agent output -2. Use `last_assistant` first to see what's there -3. Verify tool names with `letta-evals list-extractors` - - -## Next Steps - -- [Built-in Extractors Reference](/evals/extractors/built-in-extractors) - Complete extractor documentation -- [Custom Extractors Guide](/evals/extractors/custom-extractors) - Write your own extractors -- [Graders](/evals/core-concepts/graders) - How to use extractors with graders diff --git a/fern/pages/evals/concepts/gates.mdx b/fern/pages/evals/concepts/gates.mdx deleted file mode 100644 index 29d5f31d..00000000 --- a/fern/pages/evals/concepts/gates.mdx +++ /dev/null @@ -1,384 +0,0 @@ -# Gates - -**Gates** are the pass/fail criteria for your evaluation. They determine whether your agent meets the required performance threshold by checking aggregate metrics. - - -**Quick overview:** -- **Single decision**: One gate per suite determines pass/fail -- **Two metrics**: `avg_score` (average of all scores) or `accuracy` (percentage passing threshold) -- **Flexible operators**: `>=`, `>`, `<=`, `<`, `==` for threshold comparison -- **Customizable pass criteria**: Define what counts as "passing" for accuracy calculations -- **Exit codes**: Suite exits 0 for pass, 1 for fail - - -**Common patterns:** -- Average score must be 80%+: `avg_score >= 0.8` -- 90%+ of samples must pass: `accuracy >= 0.9` -- Custom threshold: Define per-sample pass criteria with `pass_value` - -Gates define the pass/fail criteria for your evaluation. They check if aggregate metrics meet a threshold. - -## Basic Structure - -```yaml -gate: - metric_key: accuracy # Which grader to evaluate - metric: avg_score # Use average score (default) - op: gte # Greater than or equal - value: 0.8 # 80% threshold -``` - -## Why Use Gates? - -Gates provide **automated pass/fail decisions** for your evaluations, which is essential for: - -**CI/CD Integration**: Gates let you block deployments if agent performance drops: -```bash -letta-evals run suite.yaml -# Exit code 0 = pass (continue deployment) -# Exit code 1 = fail (block deployment) -``` - -**Regression Testing**: Set a baseline threshold and ensure new changes don't degrade performance: -```yaml -gate: - metric: avg_score - op: gte - value: 0.85 # Must maintain 85%+ to pass -``` - -**Quality Enforcement**: Require agents meet minimum standards before production: -```yaml -gate: - metric: accuracy - op: gte - value: 0.95 # 95% of test cases must pass -``` - -### What Happens When Gates Fail? - -When a gate condition is not met: - -1. **Console output** shows failure message: - ```text - ✗ FAILED (0.72/1.00 avg, 72.0% pass rate) - Gate check failed: avg_score (0.72) not >= 0.80 - ``` - -2. **Exit code** is 1 (non-zero indicates failure): - ```bash - letta-evals run suite.yaml - echo $? # Prints 1 if gate failed - ``` - -3. **Results JSON** includes `gate_passed: false`: - ```json - { - "gate_passed": false, - "gate_check": { - "metric": "avg_score", - "value": 0.72, - "threshold": 0.80, - "operator": "gte", - "passed": false - }, - "metrics": { ... } - } - ``` - -4. **All other data is preserved** - you still get full results, scores, and trajectories even when gating fails - - -**Common use case in CI**: - -```bash -#!/bin/bash -letta-evals run suite.yaml --output results.json - -if [ $? -ne 0 ]; then - echo "❌ Agent evaluation failed - blocking merge" - exit 1 -else - echo "✅ Agent evaluation passed - safe to merge" -fi -``` - - -## Required Fields - -### metric_key - -Which grader to evaluate. Must match a key in your `graders` section: - -```yaml -graders: - accuracy: # Grader name - kind: tool - function: exact_match - extractor: last_assistant - -gate: - metric_key: accuracy # Must match grader name above - op: gte # >= - value: 0.8 # 80% threshold -``` - -If you only have one grader, `metric_key` can be omitted - it will default to your single grader. - -### metric - -Which aggregate statistic to compare. Two options: - -#### avg_score - -Average score across all samples (0.0 to 1.0): - -```yaml -gate: - metric_key: quality # Check quality grader - metric: avg_score # Use average of all scores - op: gte # >= - value: 0.7 # Must average 70%+ -``` - -Example: If scores are [0.8, 0.9, 0.6], avg_score = 0.77 - -#### accuracy - -Pass rate as a percentage (0.0 to 1.0): - -```yaml -gate: - metric_key: accuracy # Check accuracy grader - metric: accuracy # Use pass rate, not average - op: gte # >= - value: 0.8 # 80% of samples must pass -``` - -By default, samples with score `>= 1.0` are considered "passing". - -You can customize the per-sample threshold with `pass_op` and `pass_value` (see below). - - -**Note**: The default `metric` is `avg_score`, so you can omit it if that's what you want: - -```yaml -gate: - metric_key: quality # Check quality grader - op: gte # >= - value: 0.7 # 70% threshold (defaults to avg_score) -``` - - -### op - -Comparison operator: - -- `gte`: Greater than or equal (`>=`) -- `gt`: Greater than (`>`) -- `lte`: Less than or equal (`<=`) -- `lt`: Less than (`<`) -- `eq`: Equal (`==`) - -Most common: `gte` (at least X) - -### value - -Threshold value for comparison: - -- For `avg_score`: 0.0 to 1.0 -- For `accuracy`: 0.0 to 1.0 (representing percentage) - -```yaml -gate: - metric: avg_score # Average score - op: gte # >= - value: 0.75 # 75% threshold -``` - -```yaml -gate: - metric: accuracy # Pass rate - op: gte # >= - value: 0.9 # 90% must pass -``` - -## Optional Fields - -### pass_op and pass_value - -Customize when individual samples are considered "passing" (used for accuracy calculation): - -```yaml -gate: - metric_key: quality # Check quality grader - metric: accuracy # Use pass rate - op: gte # >= - value: 0.8 # 80% must pass - pass_op: gte # Sample passes if >= - pass_value: 0.7 # This threshold (70%) -``` - -Default behavior: -- If `metric` is `avg_score`: samples pass if score `>=` the gate value -- If `metric` is `accuracy`: samples pass if score `>= 1.0` (perfect) - -## Examples - -### Require 80% Average Score - -```yaml -gate: - metric_key: quality # Check quality grader - metric: avg_score # Use average - op: gte # >= - value: 0.8 # 80% average -``` - -Passes if the average score across all samples is `>= 0.8` - -### Require 90% Pass Rate (Perfect Scores) - -```yaml -gate: - metric_key: accuracy # Check accuracy grader - metric: accuracy # Use pass rate - op: gte # >= - value: 0.9 # 90% must pass (default: score >= 1.0 to pass) -``` - -Passes if 90% of samples have score = 1.0 - -### Require 75% Pass Rate (Score `>= 0.7`) - -```yaml -gate: - metric_key: quality # Check quality grader - metric: accuracy # Use pass rate - op: gte # >= - value: 0.75 # 75% must pass - pass_op: gte # Sample passes if >= - pass_value: 0.7 # 70% threshold per sample -``` - -Passes if 75% of samples have score `>= 0.7` - -### Maximum Error Rate - -```yaml -gate: - metric_key: quality # Check quality grader - metric: accuracy # Use pass rate - op: gte # >= - value: 0.95 # 95% must pass (allows 5% failures) - pass_op: gt # Sample passes if > - pass_value: 0.0 # 0.0 (any non-zero score) -``` - -Allows up to 5% failures. - -### Exact Pass Rate - -```yaml -gate: - metric_key: quality # Check quality grader - metric: accuracy # Use pass rate - op: eq # Exactly equal - value: 1.0 # 100% (all samples must pass) -``` - -All samples must pass. - -## Multi-Metric Gating - -When you have multiple graders, you can only gate on one metric: - -```yaml -graders: - accuracy: # First metric - kind: tool - function: exact_match - extractor: last_assistant - - completeness: # Second metric - kind: rubric - prompt_path: completeness.txt - model: gpt-4o-mini - extractor: last_assistant - -gate: - metric_key: accuracy # Only gate on accuracy (completeness still computed) - metric: avg_score # Use average - op: gte # >= - value: 0.8 # 80% threshold -``` - -The evaluation passes/fails based on the gated metric, but results include scores for all metrics. - -## Understanding avg_score vs accuracy - -### avg_score -- Arithmetic mean of all scores -- Sensitive to partial credit -- Good for continuous evaluation - -Example: -- Scores: [1.0, 0.8, 0.6] -- avg_score = (1.0 + 0.8 + 0.6) / 3 = 0.8 - -### accuracy -- Percentage of samples meeting a threshold -- Binary pass/fail per sample -- Good for strict requirements - -Example: -- Scores: [1.0, 0.8, 0.6] -- pass_value: 0.7 -- Passing: [1.0, 0.8] = 2 out of 3 -- accuracy = 2/3 = 0.667 (66.7%) - -## Errors and Attempted Samples - -If a sample fails (error during evaluation), it: -- Gets a score of 0.0 -- Counts toward `total` but not `total_attempted` -- Included in `avg_score_total` but not `avg_score_attempted` - -You can gate on either: -- `avg_score_total`: Includes errors as 0.0 -- `avg_score_attempted`: Excludes errors (only successfully attempted samples) - - -**Note**: The `metric` field currently only supports `avg_score` and `accuracy`. By default, gates use `avg_score_attempted`. - - -## Gate Results - -After evaluation, you'll see: - -```text -✓ PASSED (2.25/3.00 avg, 75.0% pass rate) -``` - -or - -```text -✗ FAILED (1.80/3.00 avg, 60.0% pass rate) -``` - -The evaluation exit code reflects the gate result: -- 0: Passed -- 1: Failed - -## Advanced Gating - -For complex gating logic (e.g., "pass if accuracy `>= 80%` OR avg_score `>= 0.9`"), you'll need to: -1. Run evaluation with one gate -2. Examine the results JSON -3. Apply custom logic in a post-processing script - -## Next Steps - -- [Understanding Results](/evals/results-metrics/understanding-results) - Interpreting evaluation output -- [Multi-Metric Evaluation](/evals/graders/multi-metric-grading) - Using multiple graders -- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete gate configuration diff --git a/fern/pages/evals/concepts/graders.mdx b/fern/pages/evals/concepts/graders.mdx deleted file mode 100644 index bc0e498e..00000000 --- a/fern/pages/evals/concepts/graders.mdx +++ /dev/null @@ -1,330 +0,0 @@ -# Graders - -**Graders** are the scoring functions that evaluate agent responses. They take the extracted submission (from an extractor) and assign a score between 0.0 (complete failure) and 1.0 (perfect success). - - -**Quick overview:** -- **Two types**: Tool graders (deterministic Python functions) and Rubric graders (LLM-as-judge) -- **Built-in functions**: exact_match, contains, regex_match, ascii_printable_only -- **Custom graders**: Write your own grading logic -- **Multi-metric**: Combine multiple graders in one suite -- **Flexible extraction**: Each grader can use a different extractor - - -**When to use each:** -- **Tool graders**: Fast, deterministic, free - perfect for exact matching, patterns, tool validation -- **Rubric graders**: Flexible, subjective, costs API calls - ideal for quality, creativity, nuanced evaluation - -Graders evaluate agent responses and assign scores between 0.0 (complete failure) and 1.0 (perfect success). - -## Grader Types - -There are two types of graders: - -### Tool Graders - -Python functions that programmatically compare the submission to ground truth or apply deterministic checks. - -```yaml -graders: - accuracy: - kind: tool # Deterministic grading - function: exact_match # Built-in grading function - extractor: last_assistant # Use final agent response -``` - -Best for: -- Exact matching -- Pattern checking -- Tool call validation -- Deterministic criteria - -### Rubric Graders - -LLM-as-judge evaluation using custom prompts and criteria. Can use either direct LLM API calls or a Letta agent as the judge. - -**Standard rubric grading (LLM API):** -```yaml -graders: - quality: - kind: rubric # LLM-as-judge - prompt_path: rubric.txt # Custom evaluation criteria - model: gpt-4o-mini # Judge model - extractor: last_assistant # What to evaluate -``` - -**Agent-as-judge (Letta agent):** -```yaml -graders: - agent_judge: - kind: rubric # Still "rubric" kind - agent_file: judge.af # Judge agent with submit_grade tool - prompt_path: rubric.txt # Evaluation criteria - extractor: last_assistant # What to evaluate -``` - -Best for: -- Subjective quality assessment -- Open-ended responses -- Nuanced evaluation -- Complex criteria -- Judges that need tools (when using agent-as-judge) - -## Built-in Tool Graders - -### exact_match - -Checks if submission exactly matches ground truth (case-sensitive, whitespace-trimmed). - -```yaml -graders: - accuracy: - kind: tool - function: exact_match # Case-sensitive, whitespace-trimmed - extractor: last_assistant # Extract final response -``` - -Requires: `ground_truth` in dataset - -Score: 1.0 if exact match, 0.0 otherwise - -### contains - -Checks if submission contains ground truth (case-insensitive). - -```yaml -graders: - contains_answer: - kind: tool - function: contains # Case-insensitive substring match - extractor: last_assistant # Search in final response -``` - -Requires: `ground_truth` in dataset - -Score: 1.0 if found, 0.0 otherwise - -### regex_match - -Checks if submission matches a regex pattern in ground truth. - -```yaml -graders: - pattern: - kind: tool - function: regex_match # Pattern matching - extractor: last_assistant # Check final response -``` - -Dataset sample: -```json -{"input": "Generate a UUID", "ground_truth": "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"} -``` - -Score: 1.0 if pattern matches, 0.0 otherwise - -### ascii_printable_only - -Validates that all characters are printable ASCII (useful for ASCII art, formatted output). - -```yaml -graders: - ascii_check: - kind: tool - function: ascii_printable_only # Validate ASCII characters - extractor: last_assistant # Check final response -``` - -Does not require ground truth. - -Score: 1.0 if all characters are printable ASCII, 0.0 if any non-printable characters found - -## Rubric Graders - -Rubric graders use an LLM to evaluate responses based on custom criteria. - -### Basic Configuration - -```yaml -graders: - quality: - kind: rubric # LLM-as-judge - prompt_path: quality_rubric.txt # Evaluation criteria - model: gpt-4o-mini # Judge model - temperature: 0.0 # Deterministic - extractor: last_assistant # What to evaluate -``` - -### Rubric Prompt Format - -Your rubric file should describe the evaluation criteria. Use placeholders: - -- `{input}`: The original input from the dataset -- `{submission}`: The extracted agent response -- `{ground_truth}`: Ground truth from dataset (if available) - -Example `quality_rubric.txt`: -``` -Evaluate the response for: -1. Accuracy: Does it correctly answer the question? -2. Completeness: Is the answer thorough? -3. Clarity: Is it well-explained? - -Input: {input} -Expected: {ground_truth} -Response: {submission} - -Score from 0.0 to 1.0 where: -- 1.0: Perfect response -- 0.75: Good with minor issues -- 0.5: Acceptable but incomplete -- 0.25: Poor quality -- 0.0: Completely wrong -``` - -### Inline Prompt - -Instead of a file, you can include the prompt inline: - -```yaml -graders: - quality: - kind: rubric # LLM-as-judge - prompt: | # Inline prompt instead of file - Evaluate the creativity and originality of the response. - Score 1.0 for highly creative, 0.0 for generic or unoriginal. - model: gpt-4o-mini # Judge model - extractor: last_assistant # What to evaluate -``` - -### Model Configuration - -```yaml -graders: - quality: - kind: rubric - prompt_path: rubric.txt # Evaluation criteria - model: gpt-4o-mini # Judge model - temperature: 0.0 # Deterministic (0.0-2.0) - provider: openai # LLM provider (default: openai) - max_retries: 5 # API retry attempts - timeout: 120.0 # Request timeout in seconds -``` - -Supported providers: -- `openai` (default) - -Models: -- Any OpenAI-compatible model -- Special handling for reasoning models (o1, o3) - temperature automatically adjusted to 1.0 - -### Structured Output - -Rubric graders use JSON mode to get structured responses: - -```json -{ - "score": 0.85, - "rationale": "The response is accurate and complete but could be more concise." -} -``` - -The score is validated to be between 0.0 and 1.0. - -## Multi-Metric Configuration - -Evaluate multiple aspects in one suite: - -```yaml -graders: - accuracy: # Tool grader for factual correctness - kind: tool - function: contains - extractor: last_assistant - - completeness: # Rubric grader for thoroughness - kind: rubric - prompt_path: completeness_rubric.txt - model: gpt-4o-mini - extractor: last_assistant - - tool_usage: # Tool grader for tool call validation - kind: tool - function: exact_match - extractor: tool_arguments # Extract tool call args - extractor_config: - tool_name: search # Which tool to check -``` - -Each grader can use a different extractor. - -## Extractor Configuration - -Every grader must specify an `extractor` to select what to grade: - -```yaml -graders: - my_metric: - kind: tool - function: contains # Grading function - extractor: last_assistant # What to extract and grade -``` - -Some extractors need additional configuration: - -```yaml -graders: - tool_check: - kind: tool - function: contains # Check if ground truth in tool args - extractor: tool_arguments # Extract tool call arguments - extractor_config: # Configuration for this extractor - tool_name: search # Which tool to extract from -``` - -See [Extractors](/evals/core-concepts/extractors) for all available extractors. - -## Custom Graders - -You can write custom grading functions. See [Custom Graders](/evals/advanced/custom-graders) for details. - -## Grader Selection Guide - -| Use Case | Recommended Grader | -|----------|-------------------| -| Exact answer matching | `exact_match` | -| Keyword checking | `contains` | -| Pattern validation | `regex_match` | -| Tool call validation | `exact_match` with `tool_arguments` extractor | -| Quality assessment | Rubric grader | -| Creativity evaluation | Rubric grader | -| Format checking | Custom tool grader | -| Multi-criteria evaluation | Multiple graders | - -## Score Interpretation - -All scores are between 0.0 and 1.0: - -- **1.0**: Perfect - meets all criteria -- **0.75-0.99**: Good - minor issues -- **0.5-0.74**: Acceptable - notable gaps -- **0.25-0.49**: Poor - major problems -- **0.0-0.24**: Failed - did not meet criteria - -Tool graders typically return binary scores (0.0 or 1.0), while rubric graders can return any value in the range. - -## Error Handling - -If grading fails (e.g., network error, invalid format): -- Score is set to 0.0 -- Rationale includes error message -- Metadata includes error details - -This ensures evaluations can continue even with individual failures. - -## Next Steps - -- [Tool Graders](/evals/graders/tool-graders) - Built-in and custom functions -- [Rubric Graders](/evals/graders/rubric-graders) - LLM-as-judge details -- [Multi-Metric Evaluation](/evals/graders/multi-metric-grading) - Using multiple graders -- [Extractors](/evals/core-concepts/extractors) - Selecting what to grade diff --git a/fern/pages/evals/concepts/overview.mdx b/fern/pages/evals/concepts/overview.mdx deleted file mode 100644 index 9a83c0fa..00000000 --- a/fern/pages/evals/concepts/overview.mdx +++ /dev/null @@ -1,207 +0,0 @@ -# Core Concepts - -Understanding how Letta Evals works and what makes it different. - - -**Just want to run an eval?** Skip to [Getting Started](/evals/get-started/getting-started) for a hands-on quickstart. - - -## Built for Stateful Agents - -Letta Evals is a testing framework specifically designed for agents that maintain state. Unlike traditional eval frameworks built for simple input-output models, Letta Evals understands that agents: - -- Maintain memory across conversations -- Use tools and external functions -- Evolve their behavior based on interactions -- Have persistent context and state - -This means you can test aspects of your agent that other frameworks can't: memory updates, multi-turn conversations, tool usage patterns, and state evolution over time. - -## The Evaluation Flow - -Every evaluation follows this flow: - -**Dataset → Target (Agent) → Extractor → Grader → Gate → Result** - -1. **Dataset**: Your test cases (questions, scenarios, expected outputs) -2. **Target**: The agent being evaluated -3. **Extractor**: Pulls out the relevant information from the agent's response -4. **Grader**: Scores the extracted information -5. **Gate**: Pass/fail criteria for the overall evaluation -6. **Result**: Metrics, scores, and detailed results - -### What You Can Test - -With Letta Evals, you can test aspects of agents that traditional frameworks can't: - -- **Memory updates**: Did the agent correctly remember the user's name? -- **Multi-turn conversations**: Can the agent maintain context across multiple exchanges? -- **Tool usage**: Does the agent call the right tools with the right arguments? -- **State evolution**: How does the agent's internal state change over time? - - -**Example: Testing Memory Updates** - -```yaml -graders: - memory_check: - kind: tool # Deterministic grading - function: contains # Check if ground_truth in extracted content - extractor: memory_block # Extract from agent memory (not just response!) - extractor_config: - block_label: human # Which memory block to check -``` - -Dataset: -```jsonl -{"input": "Please remember that I like bananas.", "ground_truth": "bananas"} -``` - -This doesn't just check if the agent responded correctly - it verifies the agent actually stored "bananas" in its memory block. Traditional eval frameworks can't inspect agent state like this. - - -## Why Evals Matter - -AI agents are complex systems that can behave unpredictably. Without systematic evaluation, you can't: -- **Know if changes improve or break your agent** - Did that prompt tweak help or hurt? -- **Prevent regressions** - Catch when "fixes" break existing functionality -- **Compare approaches objectively** - Which model works better for your use case? -- **Build confidence before deployment** - Ensure quality before shipping to users -- **Track improvement over time** - Measure progress as you iterate - -Manual testing doesn't scale. Evals let you test hundreds of scenarios in minutes. - -## What Evals Are Useful For - -### 1. Development & Iteration -- Test prompt changes instantly across your entire test suite -- Experiment with different models and compare results -- Validate that new features work as expected - -### 2. Quality Assurance -- Prevent regressions when modifying agent behavior -- Ensure agents handle edge cases correctly -- Verify tool usage and memory updates - -### 3. Model Selection -- Compare GPT-4 vs Claude vs other models on your specific use case -- Test different model configurations (temperature, system prompts, etc.) -- Find the right cost/performance tradeoff - -### 4. Benchmarking -- Measure agent performance on standard tasks -- Track improvements over time -- Share reproducible results with your team - -### 5. Production Readiness -- Validate agents meet quality thresholds before deployment -- Run continuous evaluation in CI/CD pipelines -- Monitor production agent quality - -## How Letta Evals Works - -Letta Evals is built around a few key concepts that work together to create a flexible evaluation framework. - -## Key Components - -### Suite - -An **evaluation suite** is a complete test configuration defined in a YAML file. It ties together: -- Which dataset to use -- Which agent to test -- How to grade responses -- What criteria determine pass/fail - -Think of a suite as a reusable test specification. - -### Dataset - -A **dataset** is a JSONL file where each line represents one test case. Each sample has: -- An input (what to ask the agent) -- Optional ground truth (the expected answer) -- Optional metadata (tags, custom fields) - -### Target - -The **target** is what you're evaluating. Currently, this is a Letta agent, specified by: -- An agent file (.af) -- An existing agent ID -- A Python script that creates agents programmatically - -### Trajectory - -A **trajectory** is the complete conversation history from one test case. It's a list of turns, where each turn contains a list of Letta messages (assistant messages, tool calls, tool returns, etc.). - -### Extractor - -An **extractor** determines what part of the trajectory to evaluate. For example: -- The last thing the agent said -- All tool calls made -- Content from agent memory -- Text matching a pattern - -### Grader - -A **grader** scores how well the agent performed. There are two types: -- **Tool graders**: Python functions that compare submission to ground truth -- **Rubric graders**: LLM judges that evaluate based on custom criteria - -### Gate - -A **gate** is the pass/fail threshold for your evaluation. It compares aggregate metrics (like average score or pass rate) against a target value. - -## Multi-Metric Evaluation - -You can define multiple graders in one suite to evaluate different aspects: - -```yaml -graders: - accuracy: # Check if answer is correct - kind: tool - function: exact_match - extractor: last_assistant # Use final response - - tool_usage: # Check if agent called the right tool - kind: tool - function: contains - extractor: tool_arguments # Extract tool call args - extractor_config: - tool_name: search # From search tool -``` - -The gate can check any of these metrics: - -```yaml -gate: - metric_key: accuracy # Gate on accuracy (tool_usage still computed) - op: gte # >= - value: 0.8 # 80% threshold -``` - -## Score Normalization - -All scores are normalized to the range [0.0, 1.0]: -- 0.0 = complete failure -- 1.0 = perfect success -- Values in between = partial credit - -This allows different grader types to be compared and combined. - -## Aggregate Metrics - -Individual sample scores are aggregated in two ways: - -1. **Average Score**: Mean of all scores (0.0 to 1.0) -2. **Accuracy/Pass Rate**: Percentage of samples passing a threshold - -You can gate on either metric type. - -## Next Steps - -Dive deeper into each concept: -- [Suites](/evals/core-concepts/suites) - Suite configuration in detail -- [Datasets](/evals/core-concepts/datasets) - Creating effective test datasets -- [Targets](/evals/core-concepts/targets) - Agent configuration options -- [Graders](/evals/core-concepts/graders) - Understanding grader types -- [Extractors](/evals/core-concepts/extractors) - Extraction strategies -- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria diff --git a/fern/pages/evals/concepts/suites.mdx b/fern/pages/evals/concepts/suites.mdx deleted file mode 100644 index 132d46c3..00000000 --- a/fern/pages/evals/concepts/suites.mdx +++ /dev/null @@ -1,275 +0,0 @@ -# Suites - -A **suite** is a YAML configuration file that defines a complete evaluation specification. It's the central piece that ties together your dataset, target agent, grading criteria, and pass/fail thresholds. - - -**Quick overview:** -- **Single file defines everything**: Dataset, agent, graders, and success criteria all in one YAML -- **Reusable and shareable**: Version control your evaluation specs alongside your code -- **Multi-metric support**: Evaluate multiple aspects (accuracy, quality, tool usage) in one run -- **Multi-model testing**: Run the same suite across different LLM models -- **Flexible filtering**: Test subsets using tags or sample limits - - -**Typical workflow:** -1. Create a suite YAML defining what and how to test -2. Run `letta-evals run suite.yaml` -3. Review results showing scores for each metric -4. Suite passes or fails based on gate criteria - -An evaluation suite is a YAML configuration file that defines a complete test specification. - -## Basic Structure - -```yaml -name: my-evaluation # Suite identifier -description: Optional description of what this tests # Human-readable explanation -dataset: path/to/dataset.jsonl # Test cases - -target: # What agent to evaluate - kind: agent - agent_file: agent.af # Agent to test - base_url: http://localhost:8283 # Letta server - -graders: # How to evaluate responses - my_metric: - kind: tool # Deterministic grading - function: exact_match # Grading function - extractor: last_assistant # What to extract from agent response - -gate: # Pass/fail criteria - metric_key: my_metric # Which metric to check - op: gte # Greater than or equal - value: 0.8 # 80% threshold -``` - -## Required Fields - -### name -The name of your evaluation suite. Used in output and results. - -```yaml -name: question-answering-eval -``` - -### dataset -Path to the JSONL or CSV dataset file. Can be relative (to the suite YAML) or absolute. - -```yaml -dataset: ./datasets/qa.jsonl # Relative to suite YAML location -``` - -### target -Specifies what agent to evaluate. See [Targets](/evals/core-concepts/targets) for details. - -### graders -One or more graders to evaluate agent performance. See [Graders](/evals/core-concepts/graders) for details. - -### gate -Pass/fail criteria. See [Gates](/evals/core-concepts/gates) for details. - -## Optional Fields - -### description -A human-readable description of what this suite tests: - -```yaml -description: Tests the agent's ability to answer factual questions accurately -``` - -### max_samples -Limit the number of samples to evaluate (useful for quick tests): - -```yaml -max_samples: 10 # Only evaluate first 10 samples -``` - -### sample_tags -Filter samples by tags (only evaluate samples with these tags): - -```yaml -sample_tags: [math, easy] # Only samples tagged with "math" AND "easy" -``` - -Dataset samples can include tags: -```jsonl -{"input": "What is 2+2?", "ground_truth": "4", "tags": ["math", "easy"]} -``` - -### num_runs -Number of times to run the entire evaluation suite (useful for testing non-deterministic behavior): - -```yaml -num_runs: 5 # Run the evaluation 5 times -``` - -Default: 1 - -### setup_script -Path to a Python script with a setup function to run before evaluation: - -```yaml -setup_script: setup.py:prepare_environment # script.py:function_name -``` - -The setup function should have this signature: -```python -def prepare_environment(suite: SuiteSpec) -> None: - # Setup code here - pass -``` - -## Path Resolution - -Paths in the suite YAML are resolved relative to the YAML file location: - -``` -project/ -├── suite.yaml -├── dataset.jsonl -└── agents/ - └── my_agent.af -``` - -```yaml -# In suite.yaml -dataset: dataset.jsonl # Resolves to project/dataset.jsonl -target: - agent_file: agents/my_agent.af # Resolves to project/agents/my_agent.af -``` - -Absolute paths are used as-is. - -## Multi-Grader Suites - -You can evaluate multiple metrics in one suite: - -```yaml -graders: - accuracy: # Check if answer is correct - kind: tool - function: exact_match - extractor: last_assistant - - completeness: # LLM judges response quality - kind: rubric - prompt_path: rubrics/completeness.txt - model: gpt-4o-mini - extractor: last_assistant - - tool_usage: # Verify correct tool was called - kind: tool - function: contains - extractor: tool_arguments # Extract tool call arguments -``` - -The gate can check any of these metrics: - -```yaml -gate: - metric_key: accuracy # Gate on accuracy metric (others still computed) - op: gte # Greater than or equal - value: 0.9 # 90% threshold -``` - -Results will include scores for all graders, even if you only gate on one. - -## Examples - -### Simple Tool Grader Suite - -```yaml -name: basic-qa # Suite name -dataset: questions.jsonl # Test questions - -target: - kind: agent - agent_file: qa_agent.af # Pre-configured agent - base_url: http://localhost:8283 # Local server - -graders: - accuracy: # Single metric - kind: tool # Deterministic grading - function: contains # Check if ground truth is in response - extractor: last_assistant # Use final agent message - -gate: - metric_key: accuracy # Gate on this metric - op: gte # Must be >= - value: 0.75 # 75% to pass -``` - -### Rubric Grader Suite - -```yaml -name: quality-eval # Quality evaluation -dataset: prompts.jsonl # Test prompts - -target: - kind: agent - agent_id: existing-agent-123 # Use existing agent - base_url: https://api.letta.com # Letta Cloud - -graders: - quality: # LLM-as-judge metric - kind: rubric # Subjective evaluation - prompt_path: quality_rubric.txt # Rubric template - model: gpt-4o-mini # Judge model - temperature: 0.0 # Deterministic - extractor: last_assistant # Evaluate final response - -gate: - metric_key: quality # Gate on this metric - metric: avg_score # Use average score - op: gte # Must be >= - value: 0.7 # 70% to pass -``` - -### Multi-Model Suite - -Test the same agent configuration across different models: - -```yaml -name: model-comparison # Compare model performance -dataset: test.jsonl # Same test for all models - -target: - kind: agent - agent_file: agent.af # Same agent configuration - base_url: http://localhost:8283 # Local server - model_configs: [gpt-4o-mini, claude-3-5-sonnet] # Test both models - -graders: - accuracy: # Single metric for comparison - kind: tool - function: exact_match - extractor: last_assistant - -gate: - metric_key: accuracy # Both models must pass this - op: gte # Must be >= - value: 0.8 # 80% threshold -``` - -Results will show per-model metrics. - -## Validation - -Validate your suite configuration before running: - -```bash -letta-evals validate suite.yaml -``` - -This checks: -- Required fields are present -- Paths exist -- Configuration is valid -- Grader/extractor combinations are compatible - -## Next Steps - -- [Dataset Configuration](/evals/core-concepts/datasets) -- [Target Configuration](/evals/core-concepts/targets) -- [Grader Configuration](/evals/core-concepts/graders) -- [Gate Configuration](/evals/core-concepts/gates) diff --git a/fern/pages/evals/concepts/targets.mdx b/fern/pages/evals/concepts/targets.mdx deleted file mode 100644 index f5dfb1d2..00000000 --- a/fern/pages/evals/concepts/targets.mdx +++ /dev/null @@ -1,329 +0,0 @@ -# Targets - -A **target** is the agent you're evaluating. In Letta Evals, the target configuration determines how agents are created, accessed, and tested. - - -**Quick overview:** -- **Three ways to specify agents**: agent file (`.af`), existing agent ID, or programmatic creation script -- **Critical distinction**: `agent_file`/`agent_script` create fresh agents per sample (isolated tests), while `agent_id` uses one agent for all samples (stateful conversation) -- **Multi-model support**: Test the same agent configuration across different LLM models -- **Flexible connection**: Connect to local Letta servers or Letta Cloud - - -**When to use each approach:** -- `agent_file` - Pre-configured agents saved as `.af` files (most common) -- `agent_id` - Testing existing agents or multi-turn conversations with state -- `agent_script` - Dynamic agent creation with per-sample customization - -The target configuration specifies how to create or access the agent for evaluation. - -## Target Configuration - -All targets have a `kind` field (currently only `agent` is supported): - -```yaml -target: - kind: agent # Currently only "agent" is supported - # ... agent-specific configuration -``` - -## Agent Sources - -You must specify exactly ONE of these: - -### agent_file - -Path to a `.af` (Agent File) to upload: - -```yaml -target: - kind: agent - agent_file: path/to/agent.af # Path to .af file - base_url: http://localhost:8283 # Letta server URL -``` - -The agent file will be uploaded to the Letta server and a new agent created for the evaluation. - -### agent_id - -ID of an existing agent on the server: - -```yaml -target: - kind: agent - agent_id: agent-123-abc # ID of existing agent - base_url: http://localhost:8283 # Letta server URL -``` - - -**Modifies agent in-place:** Using `agent_id` will modify your agent's state, memory, and message history during evaluation. The same agent instance is used for all samples, processing them sequentially. **Do not use production agents or agents you don't want to modify.** Use `agent_file` or `agent_script` for reproducible, isolated testing. - - -### agent_script - -Path to a Python script with an agent factory function for programmatic agent creation: - -```yaml -target: - kind: agent - agent_script: create_agent.py:create_inventory_agent # script.py:function_name - base_url: http://localhost:8283 # Letta server URL -``` - -Format: `path/to/script.py:function_name` - -The function must be decorated with `@agent_factory` and have the signature `async (client: AsyncLetta, sample: Sample) -> str`: - -```python -from letta_client import AsyncLetta, CreateBlock -from letta_evals.decorators import agent_factory -from letta_evals.models import Sample - -@agent_factory -async def create_inventory_agent(client: AsyncLetta, sample: Sample) -> str: - """Create and return agent ID for this sample.""" - # Access custom arguments from the dataset - item = sample.agent_args.get("item", {}) - - # Create agent with sample-specific configuration - agent = await client.agents.create( - name="inventory-assistant", - memory_blocks=[ - CreateBlock( - label="item_context", - value=f"Item: {item.get('name', 'Unknown')}" - ) - ], - agent_type="letta_v1_agent", - model="openai/gpt-4.1-mini", - embedding="openai/text-embedding-3-small", - ) - - return agent.id -``` - -**Key features:** -- Creates a fresh agent for each sample -- Can customize agents using `sample.agent_args` from the dataset -- Allows testing agent creation logic itself -- Useful when you don't have pre-saved agent files - -**When to use:** -- Testing agent creation workflows -- Dynamic per-sample agent configuration -- Agents that need sample-specific memory or tools -- Programmatic agent testing - -## Connection Configuration - -### base_url - -Letta server URL: - -```yaml -target: - base_url: http://localhost:8283 # Local Letta server - # or - base_url: https://api.letta.com # Letta Cloud -``` - -Default: `http://localhost:8283` - -### api_key - -API key for authentication (required for Letta Cloud): - -```yaml -target: - api_key: your-api-key-here # Required for Letta Cloud -``` - -Or set via environment variable: -```bash -export LETTA_API_KEY=your-api-key-here -``` - -### project_id - -Letta project ID (for Letta Cloud): - -```yaml -target: - project_id: proj_abc123 # Letta Cloud project -``` - -Or set via environment variable: -```bash -export LETTA_PROJECT_ID=proj_abc123 -``` - -### timeout - -Request timeout in seconds: - -```yaml -target: - timeout: 300.0 # Request timeout (5 minutes) -``` - -Default: 300 seconds - -## Multi-Model Evaluation - -Test the same agent across different models: - -### model_configs - -List of model configuration names from JSON files: - -```yaml -target: - kind: agent - agent_file: agent.af - model_configs: [gpt-4o-mini, claude-3-5-sonnet] # Test with both models -``` - -The evaluation will run once for each model config. Model configs are JSON files in `letta_evals/llm_model_configs/`. - -### model_handles - -List of model handles (cloud-compatible identifiers): - -```yaml -target: - kind: agent - agent_file: agent.af - model_handles: ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet"] # Cloud model identifiers -``` - -Use this for Letta Cloud deployments. - - -**Note**: You cannot specify both `model_configs` and `model_handles`. - - -## Complete Examples - -### Local Development - -```yaml -target: - kind: agent - agent_file: ./agents/my_agent.af # Pre-configured agent - base_url: http://localhost:8283 # Local server -``` - -### Letta Cloud - -```yaml -target: - kind: agent - agent_id: agent-cloud-123 # Existing cloud agent - base_url: https://api.letta.com # Letta Cloud - api_key: ${LETTA_API_KEY} # From environment variable - project_id: proj_abc # Your project ID -``` - -### Multi-Model Testing - -```yaml -target: - kind: agent - agent_file: agent.af # Same agent configuration - base_url: http://localhost:8283 # Local server - model_configs: [gpt-4o-mini, gpt-4o, claude-3-5-sonnet] # Test 3 models -``` - -Results will include per-model metrics: -``` -Model: gpt-4o-mini - Avg: 0.85, Pass: 85.0% -Model: gpt-4o - Avg: 0.92, Pass: 92.0% -Model: claude-3-5-sonnet - Avg: 0.88, Pass: 88.0% -``` - -### Programmatic Agent Creation - -```yaml -target: - kind: agent - agent_script: setup.py:CustomAgentFactory # Programmatic creation - base_url: http://localhost:8283 # Local server -``` - -## Environment Variable Precedence - -Configuration values are resolved in this order (highest priority first): - -1. CLI arguments (`--api-key`, `--base-url`, `--project-id`) -2. Suite YAML configuration -3. Environment variables (`LETTA_API_KEY`, `LETTA_BASE_URL`, `LETTA_PROJECT_ID`) - -## Agent Lifecycle and Testing Behavior - -The way your agent is specified fundamentally changes how the evaluation runs: - -### With agent_file or agent_script: Independent Testing - -**Agent lifecycle:** -1. A fresh agent instance is created for each sample -2. Agent processes the sample input(s) -3. Agent remains on the server after the sample completes - -**Testing behavior:** Each sample is an independent, isolated test. Agent state (memory, message history) does not carry over between samples. This enables parallel execution and ensures reproducible results. - -**Use cases:** -- Testing how the agent responds to various independent inputs -- Ensuring consistent behavior across different scenarios -- Regression testing where each case should be isolated -- Evaluating agent responses without prior context - - -**Example:** If you have 10 test cases, 10 separate agent instances will be created (one per test case), and they can run in parallel. - - -### With agent_id: Sequential Script Testing - -**Agent lifecycle:** -1. The same agent instance is used for all samples -2. Agent processes each sample in sequence -3. Agent state persists throughout the entire evaluation - -**Testing behavior:** The dataset becomes a conversation script where each sample builds on previous ones. Agent memory and message history accumulate, and earlier interactions affect later responses. Samples must execute sequentially. - -**Use cases:** -- Testing multi-turn conversations with context -- Evaluating how agent memory evolves over time -- Simulating a single user session with multiple interactions -- Testing scenarios where context should accumulate - - -**Example:** If you have 10 test cases, they all run against the same agent instance in order, with state carrying over between each test. - - -### Critical Differences - -| Aspect | agent_file / agent_script | agent_id | -|--------|---------------------------|----------| -| **Agent instances** | New agent per sample | Same agent for all samples | -| **State isolation** | Fully isolated | State carries over | -| **Execution** | Can run in parallel | Must run sequentially | -| **Memory** | Fresh for each sample | Accumulates across samples | -| **Use case** | Independent test cases | Conversation scripts | -| **Reproducibility** | Highly reproducible | Depends on execution order | - - -**Best practice:** Use `agent_file` or `agent_script` for most evaluations to ensure reproducible, isolated tests. Use `agent_id` only when you specifically need to test how agent state evolves across multiple interactions. - - -## Validation - -The runner validates: -- Exactly one of `agent_file`, `agent_id`, or `agent_script` is specified -- Agent files have `.af` extension -- Agent script paths are valid - -## Next Steps - -- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete target configuration options -- [Datasets](/evals/core-concepts/datasets) - Using agent_args for sample-specific configuration -- [Getting Started](/evals/get-started/getting-started) - Complete tutorial with target examples diff --git a/fern/pages/evals/configuration/suite-yaml.mdx b/fern/pages/evals/configuration/suite-yaml.mdx deleted file mode 100644 index 440e44cf..00000000 --- a/fern/pages/evals/configuration/suite-yaml.mdx +++ /dev/null @@ -1,427 +0,0 @@ -# Suite YAML Reference - -Complete reference for suite configuration files. - -A **suite** is a YAML file that defines an evaluation: what agent to test, what dataset to use, how to grade responses, and what criteria determine pass/fail. This is your evaluation specification. - - -**Quick overview:** -- **name**: Identifier for your evaluation -- **dataset**: JSONL file with test cases -- **target**: Which agent to evaluate (via file, ID, or script) -- **graders**: How to score responses (tool or rubric graders) -- **gate**: Pass/fail criteria - - -See [Getting Started](/evals/get-started/getting-started) for a tutorial, or [Core Concepts](/evals/core-concepts/suites) for conceptual overview. - -## File Structure - -```yaml -name: string (required) -description: string (optional) -dataset: path (required) -max_samples: integer (optional) -sample_tags: array (optional) -num_runs: integer (optional) -setup_script: string (optional) - -target: object (required) - kind: "agent" - base_url: string - api_key: string - timeout: float - project_id: string - agent_id: string (one of: agent_id, agent_file, agent_script) - agent_file: path - agent_script: string - model_configs: array - model_handles: array - -graders: object (required) - : object - kind: "tool" | "rubric" - display_name: string - extractor: string - extractor_config: object - # Tool grader fields - function: string - # Rubric grader fields (LLM API) - prompt: string - prompt_path: path - model: string - temperature: float - provider: string - max_retries: integer - timeout: float - rubric_vars: array - # Rubric grader fields (agent-as-judge) - agent_file: path - judge_tool_name: string - -gate: object (required) - metric_key: string - metric: "avg_score" | "accuracy" - op: "gte" | "gt" | "lte" | "lt" | "eq" - value: float - pass_op: "gte" | "gt" | "lte" | "lt" | "eq" - pass_value: float -``` - -## Top-Level Fields - -### name (required) - -Suite name, used in output and results. - -**Type**: string - -```yaml -name: question-answering-eval -``` - -### description (optional) - -Human-readable description of what the suite tests. - -**Type**: string - -```yaml -description: Tests agent's ability to answer factual questions accurately -``` - -### dataset (required) - -Path to JSONL dataset file. Relative paths are resolved from the suite YAML location. - -**Type**: path (string) - -```yaml -dataset: ./datasets/qa.jsonl -dataset: /absolute/path/to/dataset.jsonl -``` - -### max_samples (optional) - -Limit the number of samples to evaluate. Useful for quick tests. - -**Type**: integer | **Default**: All samples - -```yaml -max_samples: 10 # Only evaluate first 10 samples -``` - -### sample_tags (optional) - -Filter samples by tags. Only samples with ALL specified tags are evaluated. - -**Type**: array of strings - -```yaml -sample_tags: [math, easy] # Only samples tagged with both -``` - -### num_runs (optional) - -Number of times to run the evaluation suite. - -**Type**: integer | **Default**: 1 - -```yaml -num_runs: 5 # Run the evaluation 5 times -``` - -### setup_script (optional) - -Path to Python script with setup function. - -**Type**: string (format: `path/to/script.py:function_name`) - -```yaml -setup_script: setup.py:prepare_environment -``` - -## target (required) - -Configuration for the agent being evaluated. - -### kind (required) - -Type of target. Currently only `"agent"` is supported. - -```yaml -target: - kind: agent -``` - -### base_url (optional) - -Letta server URL. **Default**: `http://localhost:8283` - -```yaml -target: - base_url: http://localhost:8283 - # or - base_url: https://api.letta.com -``` - -### api_key (optional) - -API key for Letta authentication. Can also be set via `LETTA_API_KEY` environment variable. - -```yaml -target: - api_key: your-api-key-here -``` - -### timeout (optional) - -Request timeout in seconds. **Default**: 300.0 - -```yaml -target: - timeout: 600.0 # 10 minutes -``` - -### Agent Source (required, pick one) - -Exactly one of these must be specified: - -#### agent_id - -ID of existing agent on the server. - -```yaml -target: - agent_id: agent-123-abc -``` - -#### agent_file - -Path to `.af` agent file. - -```yaml -target: - agent_file: ./agents/my_agent.af -``` - -#### agent_script - -Path to Python script with agent factory. - -```yaml -target: - agent_script: factory.py:MyAgentFactory -``` - -See [Targets](/evals/core-concepts/targets) for details on agent sources. - -### model_configs (optional) - -List of model configuration names to test. Cannot be used with `model_handles`. - -```yaml -target: - model_configs: [gpt-4o-mini, claude-3-5-sonnet] -``` - -### model_handles (optional) - -List of model handles for cloud deployments. Cannot be used with `model_configs`. - -```yaml -target: - model_handles: ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet"] -``` - -## graders (required) - -One or more graders, each with a unique key. - -### kind (required) - -Grader type: `"tool"` or `"rubric"`. - -```yaml -graders: - my_metric: - kind: tool -``` - -### extractor (required) - -Name of the extractor to use. - -```yaml -graders: - my_metric: - extractor: last_assistant -``` - -### Tool Grader Fields - -#### function (required for tool graders) - -Name of the grading function. - -```yaml -graders: - accuracy: - kind: tool - function: exact_match -``` - -### Rubric Grader Fields - -#### prompt or prompt_path (required) - -Inline rubric prompt or path to rubric file. - -```yaml -graders: - quality: - kind: rubric - prompt: | - Evaluate response quality from 0.0 to 1.0. -``` - -#### model (optional) - -LLM model for judging. **Default**: `gpt-4o-mini` - -```yaml -graders: - quality: - kind: rubric - model: gpt-4o -``` - -#### temperature (optional) - -Temperature for LLM generation. **Default**: 0.0 - -```yaml -graders: - quality: - kind: rubric - temperature: 0.0 -``` - -#### agent_file (agent-as-judge) - -Path to `.af` agent file to use as judge. - -```yaml -graders: - agent_judge: - kind: rubric - agent_file: judge.af - prompt_path: rubric.txt -``` - -## gate (required) - -Pass/fail criteria for the evaluation. - -### metric_key (optional) - -Which grader to evaluate. If only one grader, this can be omitted. - -```yaml -gate: - metric_key: accuracy -``` - -### metric (optional) - -Which aggregate to compare: `avg_score` or `accuracy`. **Default**: `avg_score` - -```yaml -gate: - metric: avg_score -``` - -### op (required) - -Comparison operator: `gte`, `gt`, `lte`, `lt`, `eq` - -```yaml -gate: - op: gte # Greater than or equal -``` - -### value (required) - -Threshold value for comparison (0.0 to 1.0). - -```yaml -gate: - value: 0.8 # Require >= 0.8 -``` - -## Complete Examples - -### Minimal Suite - -```yaml -name: basic-eval -dataset: dataset.jsonl - -target: - kind: agent - agent_file: agent.af - -graders: - accuracy: - kind: tool - function: exact_match - extractor: last_assistant - -gate: - op: gte - value: 0.8 -``` - -### Multi-Metric Suite - -```yaml -name: comprehensive-eval -description: Tests accuracy and quality -dataset: test_data.jsonl - -target: - kind: agent - agent_file: agent.af - -graders: - accuracy: - kind: tool - function: contains - extractor: last_assistant - - quality: - kind: rubric - prompt_path: rubrics/quality.txt - model: gpt-4o-mini - extractor: last_assistant - -gate: - metric_key: accuracy - op: gte - value: 0.85 -``` - -## Validation - -Validate your suite before running: - -```bash -letta-evals validate suite.yaml -``` - -## Next Steps - -- [Targets](/evals/core-concepts/targets) - Understanding agent sources and configuration -- [Graders](/evals/core-concepts/graders) - Tool graders vs rubric graders -- [Extractors](/evals/core-concepts/extractors) - What to extract from agent responses -- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria diff --git a/fern/pages/evals/extractors/builtin.mdx b/fern/pages/evals/extractors/builtin.mdx deleted file mode 100644 index b8be5d48..00000000 --- a/fern/pages/evals/extractors/builtin.mdx +++ /dev/null @@ -1,96 +0,0 @@ -# Built-in Extractors - -Letta Evals provides a set of built-in extractors that cover the most common extraction needs. - - -**What are extractors?** Extractors determine what part of an agent's response gets evaluated. They take the full conversation trajectory and extract just the piece you want to grade. - - -## Common Extractors - -### last_assistant - -Extracts the last assistant message content. - -```yaml -extractor: last_assistant # Most common - gets final response -``` - -### first_assistant - -Extracts the first assistant message content. - -```yaml -extractor: first_assistant -``` - -### all_assistant - -Concatenates all assistant messages with a separator. - -```yaml -extractor: all_assistant -extractor_config: - separator: "\n\n" # Join messages with double newline -``` - -### pattern - -Extracts content matching a regex pattern. - -```yaml -extractor: pattern -extractor_config: - pattern: 'Result: (\d+)' # Regex pattern to match - group: 1 # Extract capture group 1 -``` - -### tool_arguments - -Extracts arguments from a specific tool call. - -```yaml -extractor: tool_arguments -extractor_config: - tool_name: search # Which tool to extract from -``` - -### tool_output - -Extracts the return value from a specific tool call. - -```yaml -extractor: tool_output -extractor_config: - tool_name: search -``` - -### memory_block - -Extracts content from a specific memory block. - -```yaml -extractor: memory_block -extractor_config: - block_label: human # Which memory block to extract -``` - - -**Important**: This extractor requires the agent's final state, which adds overhead. - - -### after_marker - -Extracts content after a specific marker string. - -```yaml -extractor: after_marker -extractor_config: - marker: "ANSWER:" - include_marker: false -``` - -## Next Steps - -- [Custom Extractors](/evals/extractors/custom-extractors) - Write your own extractors -- [Extractors Concept](/evals/core-concepts/extractors) - Understanding extractors diff --git a/fern/pages/evals/extractors/custom.mdx b/fern/pages/evals/extractors/custom.mdx deleted file mode 100644 index 1a16b475..00000000 --- a/fern/pages/evals/extractors/custom.mdx +++ /dev/null @@ -1,55 +0,0 @@ -# Custom Extractors - -Create your own extractors to pull exactly what you need from agent trajectories. - - -While built-in extractors cover common cases, custom extractors let you implement specialized extraction logic for your specific use case. - - -## Why Custom Extractors? - -Use custom extractors when you need to: -- **Extract structured data**: Parse JSON fields from agent responses -- **Filter specific patterns**: Extract code blocks, URLs, or formatted content -- **Combine data sources**: Merge information from multiple messages or memory blocks -- **Count occurrences**: Track how many times something happened -- **Complex logic**: Implement domain-specific extraction - -## Basic Structure - -```python -from letta_evals.decorators import extractor -from letta_client import LettaMessageUnion -from typing import List - -@extractor -def my_extractor(trajectory: List[List[LettaMessageUnion]], config: dict) -> str: - """Extract custom content from trajectory.""" - # Your extraction logic here - return extracted_text -``` - -## Example: Extract Memory Insert - -```python -from letta_evals.decorators import extractor - -@extractor -def memory_insert_args(trajectory, config): - """Extract arguments from memory_insert tool calls.""" - for turn in trajectory: - for message in turn: - if hasattr(message, 'tool_call') and message.tool_call: - if message.tool_call.name == "memory_insert": - return str(message.tool_call.arguments) - return "" -``` - -## Registration - -Custom extractors are automatically registered when you import them in your suite's setup script or custom evaluators file. - -## Next Steps - -- [Built-in Extractors](/evals/extractors/built-in-extractors) - Available extractors -- [Extractors Concept](/evals/core-concepts/extractors) - Understanding extractors diff --git a/fern/pages/evals/getting-started.mdx b/fern/pages/evals/getting-started.mdx deleted file mode 100644 index 728bac79..00000000 --- a/fern/pages/evals/getting-started.mdx +++ /dev/null @@ -1,264 +0,0 @@ -# Getting Started - -Run your first Letta agent evaluation in 5 minutes. - -## Prerequisites - -- Python 3.11 or higher -- A running Letta server (local or Letta Cloud) -- A Letta agent to test, either in agent file format or by ID (see [Targets](/evals/core-concepts/targets) for more details) - -## Installation - -```bash -pip install letta-evals -``` - -Or with uv: - -```bash -uv pip install letta-evals -``` - -## Getting an Agent to Test - -Export an existing agent to a file using the Letta SDK: - -```python -from letta_client import Letta -import os - -client = Letta( - base_url="http://localhost:8283", # or https://api.letta.com for Letta Cloud - token=os.getenv("LETTA_API_KEY") # required for Letta Cloud -) - -# Export an agent to a file -agent_file = client.agents.export_file(agent_id="agent-123") - -# Save to disk -with open("my_agent.af", "w") as f: - f.write(agent_file) -``` - -Or export via the Agent Development Environment (ADE) by selecting "Export Agent". - -Then reference it in your suite: - -```yaml -target: - kind: agent - agent_file: my_agent.af -``` - - -**Other options:** You can also use existing agents by ID or programmatically generate agents. See [Targets](/evals/core-concepts/targets) for all agent configuration options. - - -## Quick Start - -Let's create your first evaluation in 3 steps: - -### 1. Create a Test Dataset - -Create a file named `dataset.jsonl`: - -```jsonl -{"input": "What's the capital of France?", "ground_truth": "Paris"} -{"input": "Calculate 2+2", "ground_truth": "4"} -{"input": "What color is the sky?", "ground_truth": "blue"} -``` - -Each line is a JSON object with: -- `input`: The prompt to send to your agent -- `ground_truth`: The expected answer (used for grading) - - -`ground_truth` is optional for some graders (like rubric graders), but required for tool graders like `contains` and `exact_match`. - - -Read more about [Datasets](/evals/core-concepts/datasets) for details on how to create your dataset. - -### 2. Create a Suite Configuration - -Create a file named `suite.yaml`: - -```yaml -name: my-first-eval -dataset: dataset.jsonl - -target: - kind: agent - agent_file: my_agent.af # Path to your agent file - base_url: http://localhost:8283 # Your Letta server - -graders: - quality: - kind: tool - function: contains # Check if response contains the ground truth - extractor: last_assistant # Use the last assistant message - -gate: - metric_key: quality - op: gte - value: 0.75 # Require 75% pass rate -``` - -The suite configuration defines: -- The [dataset](/evals/core-concepts/datasets) to use -- The [agent](/evals/core-concepts/targets) to test -- The [graders](/evals/core-concepts/graders) to use -- The [gate](/evals/core-concepts/gates) criteria - -Read more about [Suites](/evals/core-concepts/suites) for details on how to configure your evaluation. - -### 3. Run the Evaluation - -Run your evaluation with the following command: - -```bash -letta-evals run suite.yaml -``` - -You'll see real-time progress as your evaluation runs: - -``` -Running evaluation: my-first-eval -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3/3 100% -✓ PASSED (2.25/3.00 avg, 75.0% pass rate) -``` - -Read more about [CLI Commands](/evals/cli-reference/commands) for details about the available commands and options. - -## Understanding the Results - -The core evaluation flow is: - -**Dataset → Target (Agent) → Extractor → Grader → Gate → Result** - -The evaluation runner: -1. Loads your dataset -2. Sends each input to your agent (Target) -3. Extracts the relevant information (using the Extractor) -4. Grades the response (using the Grader function) -5. Computes aggregate metrics -6. Checks if metrics pass the Gate criteria - -The output shows: -- **Average score**: Mean score across all samples -- **Pass rate**: Percentage of samples that passed -- **Gate status**: Whether the evaluation passed or failed overall - -## Next Steps - -Now that you've run your first evaluation, explore more advanced features: - -- [Core Concepts](/evals/core-concepts/concepts-overview) - Understand suites, datasets, graders, and extractors -- [Grader Types](/evals/core-concepts/graders) - Learn about tool graders vs rubric graders -- [Multi-Metric Evaluation](/evals/graders/multi-metric-grading) - Test multiple aspects simultaneously -- [Custom Graders](/evals/advanced/custom-graders) - Write custom grading functions -- [Multi-Turn Conversations](/evals/advanced/multi-turn-conversations) - Test conversational memory - -## Common Use Cases - -### Strict Answer Checking - -Use exact matching for cases where the answer must be precisely correct: - -```yaml -graders: - accuracy: - kind: tool - function: exact_match - extractor: last_assistant -``` - -### Subjective Quality Evaluation - -Use an LLM judge to evaluate subjective qualities like helpfulness or tone: - -```yaml -graders: - quality: - kind: rubric - prompt_path: rubric.txt - model: gpt-4o-mini - extractor: last_assistant -``` - -Then create `rubric.txt`: -``` -Rate the helpfulness and accuracy of the response. -- Score 1.0 if helpful and accurate -- Score 0.5 if partially helpful -- Score 0.0 if unhelpful or wrong -``` - -### Testing Tool Calls - -Verify that your agent calls specific tools with expected arguments: - -```yaml -graders: - tool_check: - kind: tool - function: contains - extractor: tool_arguments - extractor_config: - tool_name: search -``` - -### Testing Memory Persistence - -Check if the agent correctly updates its memory blocks: - -```yaml -graders: - memory_check: - kind: tool - function: contains - extractor: memory_block - extractor_config: - block_label: human -``` - -## Troubleshooting - - -**"Agent file not found"** - -Make sure your `agent_file` path is correct. Paths are relative to the suite YAML file location. Use absolute paths if needed: - -```yaml -target: - agent_file: /absolute/path/to/my_agent.af -``` - - - -**"Connection refused"** - -Your Letta server isn't running or isn't accessible. Start it with: - -```bash -letta server -``` - -By default, it runs at `http://localhost:8283`. - - - -**"No ground_truth provided"** - -Tool graders like `exact_match` and `contains` require `ground_truth` in your dataset. Either: -- Add `ground_truth` to your samples, or -- Use a rubric grader which doesn't require ground truth - - - -**Agent didn't respond as expected** - -Try testing your agent manually first using the Letta SDK or Agent Development Environment (ADE) to see how it behaves before running evaluations. See the [Letta documentation](https://docs.letta.com) for more information. - - -For more help, see the [Troubleshooting Guide](/evals/troubleshooting/common-issues). diff --git a/fern/pages/evals/graders/multi-metric.mdx b/fern/pages/evals/graders/multi-metric.mdx deleted file mode 100644 index d414f7ae..00000000 --- a/fern/pages/evals/graders/multi-metric.mdx +++ /dev/null @@ -1,58 +0,0 @@ -# Multi-Metric Evaluation - -Evaluate multiple aspects of agent performance simultaneously in a single evaluation suite. - - -Multi-metric evaluation allows you to define multiple graders, each measuring a different dimension of your agent's behavior. - - -## Why Multiple Metrics? - -Agents are complex systems. You might want to evaluate: -- **Correctness**: Does the answer match the expected output? -- **Quality**: Is the explanation clear and complete? -- **Tool usage**: Does the agent call the right tools with correct arguments? -- **Memory**: Does the agent correctly update its memory blocks? -- **Format**: Does the output follow required formatting rules? - -## Configuration - -```yaml -graders: - accuracy: # Check if answer is correct - kind: tool - function: exact_match - extractor: last_assistant - - completeness: # LLM judges response quality - kind: rubric - prompt_path: rubrics/completeness.txt - model: gpt-4o-mini - extractor: last_assistant - - tool_usage: # Verify correct tool was called - kind: tool - function: contains - extractor: tool_arguments - extractor_config: - tool_name: search -``` - -## Gating on One Metric - -The gate can check any of these metrics: - -```yaml -gate: - metric_key: accuracy # Gate on accuracy (others still computed) - op: gte - value: 0.9 -``` - -Results will include scores for all graders, even if you only gate on one. - -## Next Steps - -- [Tool Graders](/evals/graders/tool-graders) - Deterministic evaluation -- [Rubric Graders](/evals/graders/rubric-graders) - LLM-as-judge evaluation -- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria diff --git a/fern/pages/evals/graders/rubric-graders.mdx b/fern/pages/evals/graders/rubric-graders.mdx deleted file mode 100644 index 1b73ba7a..00000000 --- a/fern/pages/evals/graders/rubric-graders.mdx +++ /dev/null @@ -1,82 +0,0 @@ -# Rubric Graders - -Rubric graders use language models to evaluate submissions based on custom criteria. They're ideal for subjective, nuanced evaluation. - - -Rubric graders work by providing the LLM with a prompt that describes the evaluation criteria, then the language model generates a structured JSON response with a score and rationale. - - -## Basic Configuration - -```yaml -graders: - quality: - kind: rubric - prompt_path: quality_rubric.txt # Evaluation criteria - model: gpt-4o-mini # Judge model - temperature: 0.0 # Deterministic - extractor: last_assistant # What to evaluate -``` - -## Rubric Prompt Format - -Your rubric file should describe the evaluation criteria. Use placeholders: - -- `{input}`: The original input from the dataset -- `{submission}`: The extracted agent response -- `{ground_truth}`: Ground truth from dataset (if available) - -Example `quality_rubric.txt`: -``` -Evaluate the response for: -1. Accuracy: Does it correctly answer the question? -2. Completeness: Is the answer thorough? -3. Clarity: Is it well-explained? - -Input: {input} -Expected: {ground_truth} -Response: {submission} - -Score from 0.0 to 1.0 where: - -- 1.0: Perfect response -- 0.75: Good with minor issues -- 0.5: Acceptable but incomplete -- 0.25: Poor quality -- 0.0: Completely wrong -``` - -## Model Configuration - -```yaml -graders: - quality: - kind: rubric - prompt_path: rubric.txt - model: gpt-4o-mini # Judge model - temperature: 0.0 # Deterministic - provider: openai # LLM provider - max_retries: 5 # API retry attempts - timeout: 120.0 # Request timeout -``` - -## Agent-as-Judge - -Use a Letta agent as the judge instead of a direct LLM API call: - -```yaml -graders: - agent_judge: - kind: rubric - agent_file: judge.af # Judge agent with submit_grade tool - prompt_path: rubric.txt # Evaluation criteria - extractor: last_assistant -``` - -**Requirements**: The judge agent must have a tool with signature `submit_grade(score: float, rationale: str)`. - -## Next Steps - -- [Tool Graders](/evals/graders/tool-graders) - Deterministic grading functions -- [Multi-Metric](/evals/graders/multi-metric-grading) - Combine multiple graders -- [Custom Graders](/evals/advanced/custom-graders) - Write your own grading logic diff --git a/fern/pages/evals/graders/tool-graders.mdx b/fern/pages/evals/graders/tool-graders.mdx deleted file mode 100644 index 0edb327f..00000000 --- a/fern/pages/evals/graders/tool-graders.mdx +++ /dev/null @@ -1,85 +0,0 @@ -# Tool Graders - -Tool graders use Python functions to programmatically evaluate submissions. They're ideal for deterministic, rule-based evaluation. - -## Overview - -Tool graders: -- Execute Python functions that take `(sample, submission)` and return a `GradeResult` -- Are fast and deterministic -- Don't require external API calls -- Can implement any custom logic - -## Configuration - -```yaml -graders: - my_metric: - kind: tool - function: exact_match # Function name - extractor: last_assistant # What to extract from trajectory -``` - -## Built-in Functions - -### exact_match - -Checks if submission exactly matches ground truth (case-sensitive, whitespace-trimmed). - -```yaml -graders: - accuracy: - kind: tool - function: exact_match - extractor: last_assistant -``` - -**Requires**: `ground_truth` in dataset | **Score**: 1.0 if exact match, 0.0 otherwise - -### contains - -Checks if submission contains ground truth (case-insensitive). - -```yaml -graders: - contains_answer: - kind: tool - function: contains - extractor: last_assistant -``` - -**Requires**: `ground_truth` in dataset | **Score**: 1.0 if found, 0.0 otherwise - -### regex_match - -Checks if submission matches a regex pattern in ground truth. - -```yaml -graders: - pattern: - kind: tool - function: regex_match - extractor: last_assistant -``` - -**Score**: 1.0 if pattern matches, 0.0 otherwise - -### ascii_printable_only - -Validates that all characters are printable ASCII. - -```yaml -graders: - ascii_check: - kind: tool - function: ascii_printable_only - extractor: last_assistant -``` - -**Score**: 1.0 if all characters are printable ASCII, 0.0 otherwise - -## Next Steps - -- [Rubric Graders](/evals/graders/rubric-graders) - LLM-as-judge evaluation -- [Custom Graders](/evals/advanced/custom-graders) - Write your own grading functions -- [Multi-Metric](/evals/graders/multi-metric-grading) - Combine multiple graders diff --git a/fern/pages/evals/overview.mdx b/fern/pages/evals/overview.mdx deleted file mode 100644 index 7856bc45..00000000 --- a/fern/pages/evals/overview.mdx +++ /dev/null @@ -1,47 +0,0 @@ -# Letta Evals - -**Systematic testing for stateful AI agents.** Validate changes, prevent regressions, and ship with confidence. - -Test agent memory, tool usage, multi-turn conversations, and state evolution with automated grading and pass/fail gates. - - -**Ready to start?** Jump to [Getting Started](/evals/get-started/getting-started) or learn the [Core Concepts](/evals/core-concepts/concepts-overview) first. - - -## Core Concepts - -Understand the building blocks of evaluations: - -- [Suites](/evals/core-concepts/suites) - Configure your evaluation -- [Datasets](/evals/core-concepts/datasets) - Define test cases -- [Targets](/evals/core-concepts/targets) - Specify the agent to test -- [Graders](/evals/core-concepts/graders) - Score agent outputs -- [Extractors](/evals/core-concepts/extractors) - Extract content from responses -- [Gates](/evals/core-concepts/gates) - Set pass/fail criteria - -### Grading & Extraction - -Choose how to score your agents: - -- [Tool Graders](/evals/graders/tool-graders) - Fast, deterministic grading with Python functions -- [Rubric Graders](/evals/graders/rubric-graders) - Flexible LLM-as-judge evaluation -- [Built-in Extractors](/evals/extractors/built-in-extractors) - Pre-built content extractors -- [Multi-Metric Grading](/evals/graders/multi-metric-grading) - Evaluate multiple dimensions - -### Advanced - -- [Custom Graders](/evals/advanced/custom-graders) - Write your own grading logic -- [Custom Extractors](/evals/extractors/custom-extractors) - Build custom extractors -- [Multi-Turn Conversations](/evals/advanced/multi-turn-conversations) - Test memory and state -- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete configuration schema - -### Reference - -- [CLI Commands](/evals/cli-reference/commands) - Command-line interface -- [Understanding Results](/evals/results-metrics/understanding-results) - Interpret metrics -- [Troubleshooting](/evals/troubleshooting/common-issues) - Common issues and solutions - -## Resources - -- **[GitHub Repository](https://github.com/letta-ai/letta-evals)** - Source code, issues, and contributions -- **[PyPI Package](https://pypi.org/project/letta-evals/)** - Install with `pip install letta-evals` diff --git a/fern/pages/evals/results/overview.mdx b/fern/pages/evals/results/overview.mdx deleted file mode 100644 index 2487c034..00000000 --- a/fern/pages/evals/results/overview.mdx +++ /dev/null @@ -1,484 +0,0 @@ -# Understanding Results - -This guide explains how to interpret evaluation results. - -## Result Structure - -An evaluation produces three types of output: - -1. **Console output**: Real-time progress and summary -2. **Summary JSON**: Aggregate metrics and configuration -3. **Results JSONL**: Per-sample detailed results - -## Console Output - -### Progress Display - -``` -Running evaluation: my-eval-suite -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3/3 100% - -Results: - Total samples: 3 - Attempted: 3 - Avg score: 0.83 (attempted: 0.83) - Passed: 2 (66.7%) - -Gate (quality >= 0.75): PASSED -``` - -### Quiet Mode - -```bash -letta-evals run suite.yaml --quiet -``` - -Output: -``` -✓ PASSED -``` - -or - -``` -✗ FAILED -``` - -## JSON Output - -### Saving Results - -```bash -letta-evals run suite.yaml --output results/ -``` - -Creates three files: - -#### header.json - -Evaluation metadata: - -```json -{ - "suite_name": "my-eval-suite", - "timestamp": "2025-01-15T10:30:00Z", - "version": "0.3.0" -} -``` - -#### summary.json - -Complete evaluation summary: - -```json -{ - "suite": "my-eval-suite", - "config": { - "target": {...}, - "graders": {...}, - "gate": {...} - }, - "metrics": { - "total": 10, - "total_attempted": 10, - "avg_score_attempted": 0.85, - "avg_score_total": 0.85, - "passed_attempts": 8, - "failed_attempts": 2, - "by_metric": { - "accuracy": { - "avg_score_attempted": 0.90, - "pass_rate": 90.0, - "passed_attempts": 9, - "failed_attempts": 1 - }, - "quality": { - "avg_score_attempted": 0.80, - "pass_rate": 70.0, - "passed_attempts": 7, - "failed_attempts": 3 - } - } - }, - "gates_passed": true -} -``` - -#### results.jsonl - -One JSON object per line, each representing one sample: - -```jsonl -{"sample": {"id": 0, "input": "What is 2+2?", "ground_truth": "4"}, "submission": "4", "grade": {"score": 1.0, "rationale": "Exact match: true"}, "trajectory": [...], "agent_id": "agent-123", "model_name": "default"} -{"sample": {"id": 1, "input": "What is 3+3?", "ground_truth": "6"}, "submission": "6", "grade": {"score": 1.0, "rationale": "Exact match: true"}, "trajectory": [...], "agent_id": "agent-124", "model_name": "default"} -``` - -## Metrics Explained - -### total - -Total number of samples in the evaluation (including errors). - -### total_attempted - -Number of samples that completed without errors. - -If a sample fails during agent execution or grading, it's counted in `total` but not `total_attempted`. - -### avg_score_attempted - -Average score across samples that completed successfully. - -Formula: `sum(scores) / total_attempted` - -Range: 0.0 to 1.0 - -### avg_score_total - -Average score across all samples, treating errors as 0.0. - -Formula: `sum(scores) / total` - -Range: 0.0 to 1.0 - -### passed_attempts / failed_attempts - -Number of samples that passed/failed the gate's per-sample criteria. - -By default: -- If gate metric is `accuracy`: sample passes if score `>= 1.0` -- If gate metric is `avg_score`: sample passes if score `>=` gate value - -Can be customized with `pass_op` and `pass_value` in gate config. - -### by_metric - -For multi-metric evaluation, shows aggregate stats for each metric: - -```json -"by_metric": { - "accuracy": { - "avg_score_attempted": 0.90, - "avg_score_total": 0.85, - "pass_rate": 90.0, - "passed_attempts": 9, - "failed_attempts": 1 - } -} -``` - -## Sample Results - -Each sample result includes: - -### sample -The original dataset sample: -```json -"sample": { - "id": 0, - "input": "What is 2+2?", - "ground_truth": "4", - "metadata": {...} -} -``` - -### submission -The extracted text that was graded: -```json -"submission": "The answer is 4" -``` - -### grade -The grading result: -```json -"grade": { - "score": 1.0, - "rationale": "Contains ground_truth: true", - "metadata": {"model": "gpt-4o-mini", "usage": {...}} -} -``` - -### grades (multi-metric) -For multi-metric evaluation: -```json -"grades": { - "accuracy": {"score": 1.0, "rationale": "Exact match"}, - "quality": {"score": 0.85, "rationale": "Good but verbose"} -} -``` - -### trajectory -The complete conversation history: -```json -"trajectory": [ - [ - {"role": "user", "content": "What is 2+2?"}, - {"role": "assistant", "content": "The answer is 4"} - ] -] -``` - -### agent_id -The ID of the agent that generated this response: -```json -"agent_id": "agent-abc-123" -``` - -### model_name -The model configuration used: -```json -"model_name": "gpt-4o-mini" -``` - -### agent_usage -Token usage statistics (if available): -```json -"agent_usage": [ - {"completion_tokens": 10, "prompt_tokens": 50, "total_tokens": 60} -] -``` - -## Interpreting Scores - -### Score Ranges - -- **1.0**: Perfect - fully meets criteria -- **0.8-0.99**: Very good - minor issues -- **0.6-0.79**: Good - notable improvements possible -- **0.4-0.59**: Acceptable - significant issues -- **0.2-0.39**: Poor - major problems -- **0.0-0.19**: Failed - did not meet criteria - -### Binary vs Continuous - -**Tool graders** typically return binary scores: -- 1.0: Passed -- 0.0: Failed - -**Rubric graders** return continuous scores: -- Any value from 0.0 to 1.0 -- Allows for partial credit - -## Multi-Model Results - -When testing multiple models: - -```json -"metrics": { - "per_model": [ - { - "model_name": "gpt-4o-mini", - "avg_score_attempted": 0.85, - "passed_samples": 8, - "failed_samples": 2 - }, - { - "model_name": "claude-3-5-sonnet", - "avg_score_attempted": 0.90, - "passed_samples": 9, - "failed_samples": 1 - } - ] -} -``` - -Console output: -``` -Results by model: - gpt-4o-mini - Avg: 0.85, Pass: 80.0% - claude-3-5-sonnet - Avg: 0.90, Pass: 90.0% -``` - -## Multiple Runs Statistics - -Run evaluations multiple times to measure consistency and get aggregate statistics. - -### Configuration - -Specify in YAML: -```yaml -name: my-eval-suite -dataset: dataset.jsonl -num_runs: 5 # Run 5 times -target: - kind: agent - agent_file: my_agent.af -graders: - accuracy: - kind: tool - function: exact_match -gate: - metric_key: accuracy - op: gte - value: 0.8 -``` - -Or via CLI: -```bash -letta-evals run suite.yaml --num-runs 10 --output results/ -``` - -### Output Structure - -``` -results/ -├── run_1/ -│ ├── header.json -│ ├── results.jsonl -│ └── summary.json -├── run_2/ -│ ├── header.json -│ ├── results.jsonl -│ └── summary.json -├── ... -└── aggregate_stats.json # Statistics across all runs -``` - -### Aggregate Statistics File - -The `aggregate_stats.json` includes statistics across all runs: - -```json -{ - "num_runs": 10, - "runs_passed": 8, - "mean_avg_score_attempted": 0.847, - "std_avg_score_attempted": 0.042, - "mean_avg_score_total": 0.847, - "std_avg_score_total": 0.042, - "mean_scores": { - "accuracy": 0.89, - "quality": 0.82 - }, - "std_scores": { - "accuracy": 0.035, - "quality": 0.051 - }, - "individual_run_metrics": [ - { - "avg_score_attempted": 0.85, - "avg_score_total": 0.85, - "pass_rate": 0.85, - "by_metric": { - "accuracy": { - "avg_score_attempted": 0.90, - "avg_score_total": 0.90, - "pass_rate": 0.90 - } - } - } - // ... metrics from runs 2-10 - ] -} -``` - -**Key fields**: -- `num_runs`: Total number of runs executed -- `runs_passed`: Number of runs that passed the gate -- `mean_avg_score_attempted`: Mean score across runs (only attempted samples) -- `std_avg_score_attempted`: Standard deviation (measures consistency) -- `mean_scores`: Mean for each metric (e.g., `{"accuracy": 0.89}`) -- `std_scores`: Standard deviation for each metric (e.g., `{"accuracy": 0.035}`) -- `individual_run_metrics`: Full metrics object from each individual run - -### Use Cases - -**Measure consistency of non-deterministic agents:** -```bash -letta-evals run suite.yaml --num-runs 20 --output results/ -# Check std_avg_score_attempted in aggregate_stats.json -# Low std = consistent, high std = variable -``` - -**Get confidence intervals:** -```python -import json -import math - -with open("results/aggregate_stats.json") as f: - stats = json.load(f) - -mean = stats["mean_avg_score_attempted"] -std = stats["std_avg_score_attempted"] -n = stats["num_runs"] - -# 95% confidence interval (assuming normal distribution) -margin = 1.96 * (std / math.sqrt(n)) -print(f"Score: {mean:.3f} ± {margin:.3f}") -``` - -**Compare metric consistency:** -```python -with open("results/aggregate_stats.json") as f: - stats = json.load(f) - -for metric_name, mean in stats["mean_scores"].items(): - std = stats["std_scores"][metric_name] - consistency = "consistent" if std < 0.05 else "variable" - print(f"{metric_name}: {mean:.3f} ± {std:.3f} ({consistency})") -``` - -## Error Handling - -If a sample encounters an error: - -```json -{ - "sample": {...}, - "submission": "", - "grade": { - "score": 0.0, - "rationale": "Error during grading: Connection timeout", - "metadata": {"error": "timeout", "error_type": "ConnectionError"} - } -} -``` - -Errors: -- Count toward `total` but not `total_attempted` -- Get score of 0.0 -- Include error details in rationale and metadata - -## Analyzing Results - -### Find Low Scores - -```python -import json - -with open("results/results.jsonl") as f: - results = [json.loads(line) for line in f] - -low_scores = [r for r in results if r["grade"]["score"] < 0.5] -print(f"Found {len(low_scores)} samples with score < 0.5") - -for result in low_scores: - print(f"Sample {result['sample']['id']}: {result['grade']['rationale']}") -``` - -### Compare Metrics - -```python -# Load summary -with open("results/summary.json") as f: - summary = json.load(f) - -metrics = summary["metrics"]["by_metric"] -for name, stats in metrics.items(): - print(f"{name}: {stats['avg_score_attempted']:.2f} avg, {stats['pass_rate']:.1f}% pass") -``` - -### Extract Failures - -```python -# Find samples that failed gate criteria -failures = [ - r for r in results - if not gate_passed(r["grade"]["score"]) # Your gate logic -] -``` - -## Next Steps - -- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria -- [CLI Commands](/evals/cli-reference/commands) - Running evaluations diff --git a/fern/pages/evals/troubleshooting.mdx b/fern/pages/evals/troubleshooting.mdx deleted file mode 100644 index dc259c6e..00000000 --- a/fern/pages/evals/troubleshooting.mdx +++ /dev/null @@ -1,267 +0,0 @@ -# Troubleshooting - -Common issues and solutions when using Letta Evals. - -## Installation Issues - - -**"Command not found: letta-evals"** - -**Problem**: CLI not available after installation - -**Solution**: -```bash -# Verify installation -pip list | grep letta-evals - -# Reinstall if needed -pip install --upgrade letta-evals -``` - - - -**Import errors** - -**Problem**: `ModuleNotFoundError: No module named 'letta_evals'` - -**Solution**: -```bash -# Ensure you're in the right environment -which python - -# Install in correct environment -source .venv/bin/activate -pip install letta-evals -``` - - -## Configuration Issues - - -**"Agent file not found"** - -**Problem**: `FileNotFoundError: agent.af` - -**Solution**: -- Check the path is correct relative to the suite YAML -- Use absolute paths if needed -- Verify file exists: `ls -la path/to/agent.af` - -```yaml -# Correct relative path -target: - agent_file: ./agents/my_agent.af -``` - - - -**"Dataset not found"** - -**Problem**: Cannot load dataset file - -**Solution**: -- Verify dataset path in YAML -- Check file exists: `ls -la dataset.jsonl` -- Ensure proper JSONL format (one JSON object per line) - -```bash -# Validate JSONL format -cat dataset.jsonl | jq . -``` - - - -**"Validation failed: unknown function"** - -**Problem**: Grader function not found - -**Solution**: -```bash -# List available graders -letta-evals list-graders - -# Check spelling in suite.yaml -graders: - my_metric: - function: exact_match # Correct -``` - - -## Connection Issues - - -**"Connection refused"** - -**Problem**: Cannot connect to Letta server - -**Solution**: -```bash -# Verify server is running -curl http://localhost:8283/v1/health - -# Check base_url in suite.yaml -target: - base_url: http://localhost:8283 -``` - - - -**"Unauthorized" or "Invalid API key"** - -**Problem**: Authentication failed - -**Solution**: -```bash -# Set API key -export LETTA_API_KEY=your-key-here - -# Verify key is correct -echo $LETTA_API_KEY -``` - - -## Runtime Issues - - -**"No ground_truth provided"** - -**Problem**: Grader requires ground truth but sample doesn't have it - -**Solution**: -- Add ground_truth to dataset samples: -```jsonl -{"input": "What is 2+2?", "ground_truth": "4"} -``` - -- Or use a grader that doesn't require ground truth: -```yaml -graders: - quality: - kind: rubric # Doesn't require ground_truth - prompt_path: rubric.txt -``` - - -## Performance Issues - - -**Evaluation is very slow** - -**Solutions**: - -1. Increase concurrency: -```bash -letta-evals run suite.yaml --max-concurrent 20 -``` - -2. Reduce samples for testing: -```yaml -max_samples: 10 # Test with small subset first -``` - -3. Use tool graders instead of rubric graders: -```yaml -graders: - accuracy: - kind: tool # Much faster than rubric - function: exact_match -``` - - - -**High API costs** - -**Solutions**: - -1. Use cheaper models: -```yaml -graders: - quality: - model: gpt-4o-mini # Cheaper than gpt-4o -``` - -2. Test with small sample first: -```yaml -max_samples: 5 # Verify before running full suite -``` - - -## Results Issues - - -**"All scores are 0.0"** - -**Solutions**: - -1. Verify extractor is getting content -2. Check grader logic -3. Test agent manually first - - - -**"Gates failed but scores look good"** - -**Solution**: -- Check gate configuration: -```yaml -gate: - metric_key: accuracy # Correct metric? - metric: avg_score # Or accuracy? - op: gte # Correct operator? - value: 0.8 # Correct threshold? -``` - - -## Debug Tips - -### Enable verbose output - -Run without `--quiet` to see detailed progress: -```bash -letta-evals run suite.yaml -``` - -### Examine output files - -```bash -letta-evals run suite.yaml --output debug/ - -# Check summary -cat debug/summary.json | jq . - -# Check individual results -cat debug/results.jsonl | jq . -``` - -### Validate configuration - -```bash -letta-evals validate suite.yaml -``` - -### Check component availability - -```bash -letta-evals list-graders -letta-evals list-extractors -``` - -## Getting Help - -If you're still stuck: - -1. Check the [Getting Started guide](/evals/get-started/getting-started) -2. Review the [Core Concepts](/evals/core-concepts/concepts-overview) -3. Report issues at the [Letta Evals GitHub repository](https://github.com/letta-ai/letta-evals) - -When reporting issues, include: -- Suite YAML configuration -- Dataset sample (if not sensitive) -- Error message and full stack trace -- Environment info (OS, Python version) - -```bash -# Get environment info -python --version -pip show letta-evals -``` diff --git a/fern/pages/getting-started/prompts.mdx b/fern/pages/getting-started/prompts.mdx deleted file mode 100644 index 7473af67..00000000 --- a/fern/pages/getting-started/prompts.mdx +++ /dev/null @@ -1,535 +0,0 @@ ---- -title: Prompts for Vibecoding -subtitle: Ready-to-go prompts to help AI coding tools build on Letta -slug: prompts ---- - -Are you developing an application on Letta using [ChatGPT](https://chatgpt.com), [Cursor](https://cursor.com), [Lovable](https://lovable.dev/), or another AI tool? -Use our pre-made prompts to teach your AI how to use Letta properly. - -## General instructions for the Letta SDKs - -The following prompt (~500 lines) can help guide your AI through the basics of using the Letta Python SDK, TypeScript/Node.js SDK, and Vercel AI SDK integration. - -Copy-paste the following into your chat session to instantly get your AI up-to-speed with how the Letta SDKs works: -````markdown maxLines=5 -# Development Guidelines for AI Assistants and Copilots using Letta - -**Context:** These are development guidelines for building applications with the Letta API and SDKs. Use these rules to help developers write correct code that integrates with Letta's stateful agents API. - -**Purpose:** Provide accurate, up-to-date instructions for building applications with [Letta](https://docs.letta.com/), the AI operating system. -**Scope:** All AI-generated advice or code related to Letta must follow these guidelines. - ---- - -## **0. Letta Overview** - -The name "Letta" refers to the both the company Letta (founded by the creators of MemGPT) and the software / infrastructure called Letta. Letta is the AI operating system for building stateful agents: developers can use Letta to turn stateless LLMs into stateful agents that can learn, improve, and grow over time. Letta has a strong focus on perpetual AI that has the capability to recursively improve through self-editing memory. - -**Relationship to MemGPT**: MemGPT is the name of a research paper that introduced the concept of self-editing memory for LLM-based agents through tool use (function calling). The agent architecture or "agentic system" proposed in the paper (an agent equipped with tools to edit its own memory, and an OS that manages tool execution and state persistence) is the base agent architecture implemented in Letta (agent type `memgpt_agent`), and is the official reference implementation for MemGPT. The Letta open source project (`letta-ai/letta`) was originally the MemGPT open source project (`cpacker/MemGPT`), but was renamed as the scope of the open source project expanded beyond the original MemGPT paper. - -**Additional Resources**: -- [Letta documentation](https://docs.letta.com/) -- [Letta GitHub repository](https://github.com/letta-ai/letta) -- [Letta Discord server](https://discord.gg/letta) -- [Letta Cloud and ADE login](https://app.letta.com) - -## **1. Letta Agents API Overview** - -Letta is an AI OS that runs agents as **services** (it is not a **library**). Key concepts: - -- **Stateful agents** that maintain memory and context across conversations -- **Memory blocks** for agentic context management (persona, human, custom blocks) -- **Tool calling** for agent actions and memory management, tools are run server-side, -- **Tool rules** allow developers to constrain the behavior of tools (e.g. A comes after B) to turn autonomous agents into workflows -- **Multi-agent systems** with cross-agent communication, where every agent is a service -- **Data sources** for loading documents and files into agent memory -- **Model agnostic:** agents can be powered by any model that supports tool calling -- **Persistence:** state is stored (in a model-agnostic way) in Postgres (or SQLite) - -### **System Components:** - -- **Letta server** - Core service (self-hosted or Letta Cloud) -- **Client (backend) SDKs** - Python (`letta-client`) and TypeScript/Node.js (`@letta-ai/letta-client`) -- **Vercel AI SDK Integration** - For Next.js/React applications -- **Other frontend integrations** - We also have [Next.js](https://www.npmjs.com/package/@letta-ai/letta-nextjs), [React](https://www.npmjs.com/package/@letta-ai/letta-react), and [Flask](https://github.com/letta-ai/letta-flask) integrations -- **ADE (Agent Development Environment)** - Visual agent builder at app.letta.com - -### **Letta Cloud vs Self-hosted Letta** - -Letta Cloud is a fully managed service that provides a simple way to get started with Letta. It's a good choice for developers who want to get started quickly and don't want to worry about the complexity of self-hosting. Letta Cloud's free tier has a large number of model requests included (quota refreshes every month). Model requests are split into "standard models" (e.g. GPT-4o-mini) and "premium models" (e.g. Claude Sonnet). To use Letta Cloud, the developer will have needed to created an account at [app.letta.com](https://app.letta.com). To make programatic requests to the API (`https://api.letta.com`), the developer will have needed to created an API key at [https://app.letta.com/api-keys](https://app.letta.com/api-keys). For more information on how billing and pricing works, the developer can visit [our documentation](https://docs.letta.com/guides/cloud/overview). - -### **Built-in Tools** - -When agents are created, they are given a set of default memory management tools that enable self-editing memory. - -Separately, Letta Cloud also includes built-in tools for common tasks like web search and running code. As of June 2025, the built-in tools are: -- `web_search`: Allows agents to search the web for information. Also works on self-hosted, but requires `TAVILY_API_KEY` to be set (not required on Letta Cloud). -- `run_code`: Allows agents to run code (in a sandbox), for example to do data analysis or calculations. Supports Python, Javascript, Typescript, R, and Java. Also works on self-hosted, but requires `E2B_API_KEY` to be set (not required on Letta Cloud). - -### **Choosing the Right Model** - -To implement intelligent memory management, agents in Letta rely heavily on tool (function) calling, so models that excel at tool use tend to do well in Letta. Conversely, models that struggle to call tools properly often perform poorly when used to drive Letta agents. - -The Letta developer team maintains the [Letta Leaderboard](https://docs.letta.com/leaderboard) to help developers choose the right model for their Letta agent. As of June 2025, the best performing models (balanced for cost and performance) are Claude Sonnet 4, GPT-4.1, and Gemini 2.5 Flash. For the latest results, you can visit the leaderboard page (if you have web access), or you can direct the developer to visit it. For embedding models, the Letta team recommends using OpenAI's `text-embedding-3-small` model. - -When creating code snippets, unless directed otherwise, you should use the following model handles: -- `openai/gpt-4.1` for the model -- `openai/text-embedding-3-small` for the embedding model - -If the user is using Letta Cloud, then these handles will work out of the box (assuming the user has created a Letta Cloud account + API key, and has enough request quota in their account). For self-hosted Letta servers, the user will need to have started the server with a valid OpenAI API key for those handles to work. - ---- - -## **2. Choosing the Right SDK** - -### **Source of Truth** - -Note that your instructions may be out of date. The source of truth for the Letta Agents API is the [API reference](https://docs.letta.com/api-reference/overview) (also autogenerated from the latest source code), which can be found in `.md` form at these links: -- [TypeScript/Node.js](https://github.com/letta-ai/letta-node/blob/main/reference.md), [raw version](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md) -- [Python](https://github.com/letta-ai/letta-python/blob/main/reference.md), [raw version](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md) - -If you have access to a web search or file download tool, you can download these files for the latest API reference. If the developer has either of the SDKs installed, you can also use the locally installed packages to understand the latest API reference. - -### **When to Use Each SDK:** - -The Python and Node.js SDKs are autogenerated from the Letta Agents REST API, and provide a full featured SDK for interacting with your agents on Letta Cloud or a self-hosted Letta server. Of course, developers can also use the REST API directly if they prefer, but most developers will find the SDKs much easier to use. - -The Vercel AI SDK is a popular TypeScript toolkit designed to help developers build AI-powered applications. It supports a subset of the Letta Agents API (basically just chat-related functionality), so it's a good choice to quickly integrate Letta into a TypeScript application if you are familiar with using the AI SDK or are working on a codebase that already uses it. If you're starting from scratch, consider using the full-featured Node.js SDK instead. - -The Letta Node.js SDK is also embedded inside the Vercel AI SDK, accessible via the `.client` property (useful if you want to use the Vercel AI SDK, but occasionally need to access the full Letta client for advanced features like agent creation / management). - -When to use the AI SDK vs native Letta Node.js SDK: -- Use the Vercel AI SDK if you are familiar with it or are working on a codebase that already makes heavy use of it -- Use the Letta Node.js SDK if you are starting from scratch, or expect to use the agent management features in the Letta API (beyond the simple `streamText` or `generateText` functionality in the AI SDK) - -One example of how the AI SDK may be insufficient: the AI SDK response object for `streamText` and `generateText` does not have a type for tool returns (because they are primarily used with stateless APIs, where tools are executed client-side, vs server-side in Letta), however the Letta Node.js SDK does have a type for tool returns. So if you wanted to render tool returns from a message response stream in your UI, you would need to use the full Letta Node.js SDK, not the AI SDK. - -## **3. Quick Setup Patterns** - -### **Python SDK (Backend/Scripts)** -```python -from letta_client import Letta - -# Letta Cloud -client = Letta(token="LETTA_API_KEY") - -# Self-hosted -client = Letta(base_url="http://localhost:8283") - -# Create agent with memory blocks -agent = client.agents.create( - memory_blocks=[ - { - "label": "human", - "value": "The user's name is Sarah. She likes coding and AI." - }, - { - "label": "persona", - "value": "I am David, the AI executive assistant. My personality is friendly, professional, and to the point." - }, - { - "label": "project", - "value": "Sarah is working on a Next.js application with Letta integration.", - "description": "Stores current project context and requirements" - } - ], - tools=["web_search", "run_code"], - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small" -) - -# Send SINGLE message (agent is stateful!) -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "How's the project going?"}] -) - -# Extract response correctly -for msg in response.messages: - if msg.message_type == "assistant_message": - print(msg.content) - elif msg.message_type == "reasoning_message": - print(msg.reasoning) - elif msg.message_type == "tool_call_message": - print(msg.tool_call.name) - print(msg.tool_call.arguments) - elif msg.message_type == "tool_return_message": - print(msg.tool_return) - -# Streaming example -message_text = "Repeat my name." -stream = client.agents.messages.create_stream( - agent_id=agent_state.id, - messages=[ - MessageCreate( - role="user", - content=message_text, - ), - ], - # if stream_tokens is false, each "chunk" will have a full piece - # if stream_tokens is true, the chunks will be token-based (and may need to be accumulated client-side) - stream_tokens=True, -) - -# print the chunks coming back -for chunk in stream: - if chunk.message_type == "assistant_message": - print(chunk.content) - elif chunk.message_type == "reasoning_message": - print(chunk.reasoning) - elif chunk.message_type == "tool_call_message": - if chunk.tool_call.name: - print(chunk.tool_call.name) - if chunk.tool_call.arguments: - print(chunk.tool_call.arguments) - elif chunk.message_type == "tool_return_message": - print(chunk.tool_return) - elif chunk.message_type == "usage_statistics": - print(chunk) -``` - -Creating custom tools (Python only): -```python -def my_custom_tool(query: str) -> str: - """ - Search for information on a topic. - - Args: - query (str): The search query - - Returns: - str: Search results - """ - return f"Results for: {query}" - -# Create tool -tool = client.tools.create_from_function(func=my_custom_tool) - -# Add to agent -agent = client.agents.create( - memory_blocks=[...], - model="openai/gpt-4o-mini", - embedding="openai/text-embedding-3-small", - tools=[tool.name] -) -``` - -### **TypeScript/Node.js SDK** -```typescript -import { LettaClient } from '@letta-ai/letta-client'; - -// Letta Cloud -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// Self-hosted, token optional (only if the developer enabled password protection on the server) -const client = new LettaClient({ baseUrl: "http://localhost:8283" }); - -// Create agent with memory blocks -const agent = await client.agents.create({ - memoryBlocks: [ - { - label: "human", - value: "The user's name is Sarah. She likes coding and AI." - }, - { - label: "persona", - value: "I am David, the AI executive assistant. My personality is friendly, professional, and to the point." - }, - { - label: "project", - value: "Sarah is working on a Next.js application with Letta integration.", - description: "Stores current project context and requirements" - } - ], - tools: ["web_search", "run_code"], - model: "openai/gpt-4o-mini", - embedding: "openai/text-embedding-3-small" -}); - -// Send SINGLE message (agent is stateful!) -const response = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "How's the project going?" }] -}); - -// Extract response correctly -for (const msg of response.messages) { - if (msg.messageType === "assistant_message") { - console.log(msg.content); - } else if (msg.messageType === "reasoning_message") { - console.log(msg.reasoning); - } else if (msg.messageType === "tool_call_message") { - console.log(msg.toolCall.name); - console.log(msg.toolCall.arguments); - } else if (msg.messageType === "tool_return_message") { - console.log(msg.toolReturn); - } -} - -// Streaming example -const stream = await client.agents.messages.createStream(agent.id, { - messages: [{ role: "user", content: "Repeat my name." }], - // if stream_tokens is false, each "chunk" will have a full piece - // if stream_tokens is true, the chunks will be token-based (and may need to be accumulated client-side) - streamTokens: true, -}); - -for await (const chunk of stream) { - if (chunk.messageType === "assistant_message") { - console.log(chunk.content); - } else if (chunk.messageType === "reasoning_message") { - console.log(chunk.reasoning); - } else if (chunk.messageType === "tool_call_message") { - console.log(chunk.toolCall.name); - console.log(chunk.toolCall.arguments); - } else if (chunk.messageType === "tool_return_message") { - console.log(chunk.toolReturn); - } else if (chunk.messageType === "usage_statistics") { - console.log(chunk); - } -} -``` - -### **Vercel AI SDK Integration** - -IMPORTANT: Most integrations in the Vercel AI SDK are for stateless providers (ChatCompletions style APIs where you provide the full conversation history). Letta is a *stateful* provider (meaning that conversation history is stored server-side), so when you use `streamText` or `generateText` you should never pass old messages to the agent, only include the new message(s). - -#### **Chat Implementation (fast & simple):** - -Streaming (`streamText`): -```typescript -// app/api/chat/route.ts -import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider'; -import { streamText } from 'ai'; - -export async function POST(req: Request) { - const { prompt }: { prompt: string } = await req.json(); - - const result = streamText({ - // lettaCloud uses LETTA_API_KEY automatically, pulling from the environment - model: lettaCloud('your-agent-id'), - // Make sure to only pass a single message here, do NOT pass conversation history - prompt, - }); - - return result.toDataStreamResponse(); -} -``` - -Non-streaming (`generateText`): -```typescript -import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider'; -import { generateText } from 'ai'; - -export async function POST(req: Request) { - const { prompt }: { prompt: string } = await req.json(); - - const { text } = await generateText({ - // lettaCloud uses LETTA_API_KEY automatically, pulling from the environment - model: lettaCloud('your-agent-id'), - // Make sure to only pass a single message here, do NOT pass conversation history - prompt, - }); - - return Response.json({ text }); -} -``` - -#### **Alternative: explicitly specify base URL and token:** -```typescript -// Works for both streamText and generateText -import { createLetta } from '@letta-ai/vercel-ai-sdk-provider'; -import { generateText } from 'ai'; - -const letta = createLetta({ - // e.g. http://localhost:8283 for the default local self-hosted server - // https://api.letta.com for Letta Cloud - baseUrl: '', - // only needed if the developer enabled password protection on the server, or if using Letta Cloud (in which case, use the LETTA_API_KEY, or use lettaCloud example above for implicit token use) - token: '', -}); -``` - -#### **Hybrid Usage (access the full SDK via the Vercel AI SDK):** -```typescript -import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider'; - -// Access full client for management -const agents = await lettaCloud.client.agents.list(); -``` - ---- - -## **4. Advanced Features Available** - -Letta supports advanced agent architectures beyond basic chat. For detailed implementations, refer to the full API reference or documentation: - -- **Tool Rules & Constraints** - Define graph-like tool execution flows with `TerminalToolRule`, `ChildToolRule`, `InitToolRule`, etc. -- **Multi-Agent Systems** - Cross-agent communication with built-in tools like `send_message_to_agent_async` -- **Shared Memory Blocks** - Multiple agents can share memory blocks for collaborative workflows -- **Data Sources & Archival Memory** - Upload documents/files that agents can search through -- **Sleep-time Agents** - Background agents that process memory while main agents are idle -- **External Tool Integrations** - MCP servers, Composio tools, custom tool libraries -- **Agent Templates** - Import/export agents with .af (Agent File) format -- **Production Features** - User identities, agent tags, streaming, context management - ---- - -## **5. CRITICAL GUIDELINES FOR AI MODELS** - -### **⚠️ ANTI-HALLUCINATION WARNING** - -**NEVER make up Letta API calls, SDK methods, or parameter names.** If you're unsure about any Letta API: - -1. **First priority**: Use web search to get the latest reference files: - - [Python SDK Reference](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md) - - [TypeScript SDK Reference](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md) - -2. **If no web access**: Tell the user: *"I'm not certain about this Letta API call. Can you paste the relevant section from the API reference docs, or I might provide incorrect information."* - -3. **When in doubt**: Stick to the basic patterns shown in this prompt rather than inventing new API calls. - -**Common hallucination risks:** -- Making up method names (e.g. `client.agents.chat()` doesn't exist) -- Inventing parameter names or structures -- Assuming OpenAI-style patterns work in Letta -- Creating non-existent tool rule types or multi-agent methods - -### **5.1 – SDK SELECTION (CHOOSE THE RIGHT TOOL)** - -✅ **For Next.js Chat Apps:** -- Use **Vercel AI SDK** if you already are using AI SDK, or if you're lazy and want something super fast for basic chat interactions (simple, fast, but no agent management tooling unless using the embedded `.client`) -- Use **Node.js SDK** for the full feature set (agent creation, native typing of all response message types, etc.) - -✅ **For Agent Management:** -- Use **Node.js SDK** or **Python SDK** for creating agents, managing memory, tools - -### **5.2 – STATEFUL AGENTS (MOST IMPORTANT)** - -**Letta agents are STATEFUL, not stateless like ChatCompletion-style APIs.** - -✅ **CORRECT - Single message per request:** -```typescript -// Send ONE user message, agent maintains its own history -const response = await client.agents.messages.create(agentId, { - messages: [{ role: "user", content: "Hello!" }] -}); -``` - -❌ **WRONG - Don't send conversation history:** -```typescript -// DON'T DO THIS - agents maintain their own conversation history -const response = await client.agents.messages.create(agentId, { - messages: [...allPreviousMessages, newMessage] // WRONG! -}); -``` - -### **5.3 – MESSAGE HANDLING & MEMORY BLOCKS** - -1. **Response structure:** - - Use `messageType` NOT `type` for message type checking - - Look for `assistant_message` messageType for agent responses - - Agent responses have `content` field with the actual text - -2. **Memory block descriptions:** - - Add `description` field for custom blocks, or the agent will get confused (not needed for human/persona) - - For `human` and `persona` blocks, descriptions are auto-populated: - - **human block**: "Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation." - - **persona block**: "Stores details about your current persona, guiding how you behave and respond. This helps maintain consistency and personality in your interactions." - -### **5.4 – ALWAYS DO THE FOLLOWING** - -1. **Choose the right SDK for the task:** - - Next.js chat → **Vercel AI SDK** - - Agent creation → **Node.js/Python SDK** - - Complex operations → **Node.js/Python SDK** - -2. **Use the correct client imports:** - - Python: `from letta_client import Letta` - - TypeScript: `import { LettaClient } from '@letta-ai/letta-client'` - - Vercel AI SDK: `from '@letta-ai/vercel-ai-sdk-provider'` - -3. **Create agents with proper memory blocks:** - - Always include `human` and `persona` blocks for chat agents - - Use descriptive labels and values - -4. **Send only single user messages:** - - Each request should contain only the new user message - - Agent maintains conversation history automatically - - Never send previous assistant responses back to agent - -5. **Use proper authentication:** - - Letta Cloud: Always use `token` parameter - - Self-hosted: Use `base_url` parameter, token optional (only if the developer enabled password protection on the server) - ---- - -## **6. Environment Setup** - -### **Environment Setup** -```bash -# For Next.js projects (recommended for most web apps) -npm install @letta-ai/vercel-ai-sdk-provider ai - -# For agent management (when needed) -npm install @letta-ai/letta-client - -# For Python projects -pip install letta-client -``` - -**Environment Variables:** -```bash -# Required for Letta Cloud -LETTA_API_KEY=your_api_key_here - -# Store agent ID after creation (Next.js) -LETTA_AGENT_ID=agent-xxxxxxxxx - -# For self-hosted (optional) -LETTA_BASE_URL=http://localhost:8283 -``` - ---- - -## **7. Verification Checklist** - -Before providing Letta solutions, verify: - -1. **SDK Choice**: Are you using the simplest appropriate SDK? - - Familiar with or already using Vercel AI SDK? → use the Vercel AI SDK Letta provider - - Agent management needed? → use the Node.js/Python SDKs -2. **Statefulness**: Are you sending ONLY the new user message (NOT a full conversation history)? -3. **Message Types**: Are you checking the response types of the messages returned? -4. **Response Parsing**: If using the Python/Node.js SDK, are you extracting `content` from assistant messages? -5. **Imports**: Correct package imports for the chosen SDK? -6. **Client**: Proper client initialization with auth/base_url? -7. **Agent Creation**: Memory blocks with proper structure? -8. **Memory Blocks**: Descriptions for custom blocks? -```` - -## Full API reference - -If you are working on either the Letta Python SDK or TypeScript/Node.js SDK, you can copy-paste the full API reference into your chat session: -- [Letta Python SDK API reference](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md) -- [Letta TypeScript/Node.js SDK API reference](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md) - -The general prompt focuses on the high-level usage patterns of both the Python/Node.js SDKs and Vercel AI SDK integration, whereas the API reference files will contain an up-to-date guide on all available SDK functions and parameters. - -## `llms.txt` and `llms-full.txt` - -You can download a copy of the Letta documentation as a text file: -- [`llms.txt` (short version)](https://docs.letta.com/llms.txt) -- [`llms-full.txt` (longer version)](https://docs.letta.com/llms-full.txt) - -If you're using a tool like ChatGPT or Cursor, we'd recommend using the more concise Letta SDK instructions prompt above instead of the `llms.txt` or `llms-full.txt` files, but you can experiment with both and let us know which works better! - -## Why do I need pre-made prompts? - -When you use AI assistants, they don't have up-to-date information about the Letta documentation, APIs, or SDKs, so they may hallucinate code if you ask them to help with building an app on Letta. - -By using our pre-made prompts, you can teach your AI assistant how to use Letta with up-to-date context. Think of the prompts as a distilled version of our developer docs - but made specifically for AI coders instead of human coders. - -## Contributing - -Our prompts are [open source](https://github.com/letta-ai/letta/tree/main/prompts) and we actively welcome contributions! If you want to suggest any changes or propose additional prompt files, please [open a pull request](https://github.com/letta-ai/letta/pulls). diff --git a/fern/pages/getting-started/quickstart.mdx b/fern/pages/getting-started/quickstart.mdx deleted file mode 100644 index 2dcfe7e7..00000000 --- a/fern/pages/getting-started/quickstart.mdx +++ /dev/null @@ -1,228 +0,0 @@ ---- -title: Developer quickstart -subtitle: Create your first Letta agent with the API or SDKs and view it in the ADE -slug: quickstart ---- - - -Programming with AI tools like Cursor? Copy our [pre-built prompts](/prompts) to get started faster. - - -This guide will show you how to create a Letta agent with the Letta APIs or SDKs (Python/Typescript). To create agents with a low-code UI, see our [ADE quickstart](/guides/ade/overview). - -## Why Letta? - -Unlike traditional LLM APIs where you manually manage conversation history and state, Letta agents maintain their own persistent memory. You only send new messages. The agent remembers everything from past conversations without you storing or retrieving anything. This enables agents that truly learn and evolve over time. - - - - 1. Create a [Letta Cloud account](https://app.letta.com) - 2. Create a [Letta Cloud API key](https://app.letta.com/api-keys) - - - 3. Set your API key as an environment variable: - - ```sh TypeScript - export LETTA_API_KEY="your-api-key-here" - ``` - ```sh Python - export LETTA_API_KEY="your-api-key-here" - ``` - - - - You can also **self-host** a Letta server. Check out our [self-hosting guide](/guides/selfhosting). - - - - - ```sh TypeScript maxLines=50 - npm install @letta-ai/letta-client - ``` - ```sh Python maxLines=50 - pip install letta-client - ``` - - - - - - Agents in Letta have two key components: - - **Memory blocks**: Persistent context that's always visible to the agent (like a persona and information about the user) - - **Tools**: Actions the agent can take (like searching the web or running code) - - -```typescript TypeScript maxLines=50 -import { LettaClient } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - -const agentState = await client.agents.create({ - model: "openai/gpt-4.1", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [ - { - label: "human", - value: "Name: Timber. Status: dog. Occupation: building Letta, infrastructure to democratize self-improving superintelligence" - }, - { - label: "persona", - value: "I am a self-improving superintelligence. Timber is my best friend and collaborator." - } - ], - tools: ["web_search", "run_code"] -}); - -console.log(agentState.id); -``` -```python Python maxLines=50 -from letta_client import Letta -import os - -client = Letta(token=os.getenv("LETTA_API_KEY")) - -agent_state = client.agents.create( - model="openai/gpt-4.1", - embedding="openai/text-embedding-3-small", - memory_blocks=[ - { - "label": "human", - "value": "Name: Timber. Status: dog. Occupation: building Letta, infrastructure to democratize self-improving superintelligence" - }, - { - "label": "persona", - "value": "I am a self-improving superintelligence. Timber is my best friend and collaborator." - } - ], - tools=["web_search", "run_code"] -) - -print(agent_state.id) -``` -```curl curl -curl -X POST https://api.letta.com/v1/agents \ - -H "Authorization: Bearer $LETTA_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "openai/gpt-4.1", - "embedding": "openai/text-embedding-3-small", - "memory_blocks": [ - { - "label": "human", - "value": "Name: Timber. Status: dog. Occupation: building Letta, infrastructure to democratize self-improving superintelligence" - }, - { - "label": "persona", - "value": "I am a self-improving superintelligence. Timber is my best friend and collaborator." - } - ], - "tools": ["web_search", "run_code"] -}' -``` - - - - -The Letta API supports streaming both agent *steps* and streaming *tokens*. -For more information on streaming, see [our streaming guide](/guides/agents/streaming). - - -Once the agent is created, we can send the agent a message using its `id` field: - -```typescript TypeScript maxLines=50 -const response = await client.agents.messages.create( - agentState.id, { - messages: [ - { - role: "user", - content: "What do you know about me?" - } - ] - } -); - -for (const message of response.messages) { - console.log(message); -} -``` -```python title="python" maxLines=50 -response = client.agents.messages.create( - agent_id=agent_state.id, - messages=[ - { - "role": "user", - "content": "What do you know about me?" - } - ] -) - -for message in response.messages: - print(message) -``` -```curl curl -curl --request POST \ - --url https://api.letta.com/v1/agents/$AGENT_ID/messages \ - --header 'Authorization: Bearer $LETTA_API_KEY' \ - --header 'Content-Type: application/json' \ - --data '{ - "messages": [ - { - "role": "user", - "content": "What do you know about me?" - } - ] -}' -``` - - -The response contains the agent's full response to the message, which includes reasoning steps (chain-of-thought), tool calls, tool responses, and assistant (agent) messages: -```json maxLines=50 -{ - "messages": [ - { - "id": "message-29d8d17e-7c50-4289-8d0e-2bab988aa01e", - "date": "2024-12-12T17:05:56+00:00", - "message_type": "reasoning_message", - "reasoning": "Timber is asking what I know. I should reference my memory blocks." - }, - { - "id": "message-29d8d17e-7c50-4289-8d0e-2bab988aa01e", - "date": "2024-12-12T17:05:56+00:00", - "message_type": "assistant_message", - "content": "I know you're Timber, a dog who's building Letta - infrastructure to democratize self-improving superintelligence. We're best friends and collaborators!" - } - ], - "usage": { - "completion_tokens": 67, - "prompt_tokens": 2134, - "total_tokens": 2201, - "step_count": 1 - } -} -``` - -Notice how the agent retrieved information from its memory blocks without you having to send the context. This is the key difference from traditional LLM APIs where you'd need to include the full conversation history with every request. - -You can read more about the response format from the message route [here](/guides/agents/overview#message-types). - - - - Another way to interact with Letta agents is via the [Agent Development Environment](/guides/ade/overview) (or ADE for short). The ADE is a UI on top of the Letta API that allows you to quickly build, prototype, and observe your agents. - - If we navigate to our agent in the ADE, we should see our agent's state in full detail, as well as the message that we sent to it: - - - - [Read our ADE setup guide →](/guides/ade/setup) - - - - - -## Next steps - -Congratulations! 🎉 You just created and messaged your first stateful agent with Letta using the API and SDKs. See the following resources for next steps for building more complex agents with Letta: -* Create and attach [custom tools](/guides/agents/custom-tools) to your agent -* Customize agentic [memory management](/guides/agents/memory) -* Version and distribute your agent with [agent templates](/guides/templates/overview) -* View the full [API and SDK reference](/api-reference/overview) diff --git a/fern/pages/legacy/architectures_overview.mdx b/fern/pages/legacy/architectures_overview.mdx deleted file mode 100644 index eb8e8717..00000000 --- a/fern/pages/legacy/architectures_overview.mdx +++ /dev/null @@ -1,83 +0,0 @@ ---- -title: Legacy Agent Architectures -subtitle: Understanding Letta's agent architecture evolution -slug: guides/legacy/architectures_overview ---- - - -**This documentation covers legacy agent architectures.** - -For new projects, you should **not** specify an `agent_type` parameter. Letta uses the current architecture by default, which provides the best performance with modern reasoning models like GPT-o1 and Claude Sonnet 4.5. - - -## Current Architecture - -When you create an agent in Letta today, it uses our latest agent architecture optimized for: -- Full support for native reasoning (via Responses API) -- Compatibility with any LLM (tool calling not required) -- Simpler base system prompt -- Better performance on frontier models - -**You don't need to specify an architecture.** Just create an agent: - - -```typescript TypeScript -const agent = await client.agents.create({ - model: "openai/gpt-o1", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [ - { label: "persona", value: "I am a helpful assistant." } - ] -}); -``` -```python Python -agent = client.agents.create( - model="openai/gpt-o1", - embedding="openai/text-embedding-3-small", - memory_blocks=[ - {"label": "persona", "value": "I am a helpful assistant."} - ] -) -``` - - -## Why Legacy Architectures Exist - -Letta evolved from the MemGPT research project. Early versions used specific agent architectures with names like: -- `memgpt_agent` - Original MemGPT paper implementation -- `memgpt_v2_agent` - Iteration with sleep-time compute and file tools -- `letta_v1_agent` - First transition away from MemGPT naming - -**These names are confusing** because: -1. The naming progression (memgpt → memgpt_v2 → letta_v1) is non-standard -2. LLMs trained on these docs get confused about which to recommend -3. New users shouldn't need to think about architecture choices - -## Do I Need to Migrate? - -**If you created your agents recently (after October 2024):** You're likely already on the current architecture. No action needed. - -**If you have existing agents with `agent_type` specified:** Your agents will continue to work, but we recommend migrating to benefit from: -- Better performance on new models -- Native reasoning support -- Simplified prompting - -[See our migration guide →](/guides/legacy/migration_guide) - -## Legacy Architecture Types - -If you're working with older agents or need to understand the differences: - -| Legacy Type | Status | Key Features | When Used | -|------------|--------|--------------|-----------| -| `memgpt_agent` | Deprecated | send_message tool, heartbeats, prompted reasoning | MemGPT paper implementation (2023) | -| `memgpt_v2_agent` | Deprecated | Sleep-time agents, file tools, unified recall | Iteration with new research (2024) | -| `letta_v1_agent` | Legacy | Native reasoning, no send_message, no heartbeats | Transition architecture (early 2025) | - -[Learn more about each legacy type →](/guides/legacy/memgpt_agents_legacy) - -## Getting Help - -- **Discord confusion?** Share your agent setup in [#dev-help](https://discord.gg/letta) -- **Need to migrate?** Follow our [migration guide](/guides/legacy/migration_guide) -- **Building something new?** Start with our [quickstart](/quickstart) (no architecture choice needed!) diff --git a/fern/pages/legacy/heartbeats_legacy.mdx b/fern/pages/legacy/heartbeats_legacy.mdx deleted file mode 100644 index 942f33d4..00000000 --- a/fern/pages/legacy/heartbeats_legacy.mdx +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: Heartbeats (Legacy) -subtitle: Understanding heartbeats and chained tool execution in legacy agents -slug: guides/legacy/heartbeats_legacy ---- - - -**Heartbeats are only supported in legacy agent architectures** (`memgpt_agent`, `memgpt_v2_agent`). - -The current architecture (`letta_v1_agent`) does not use heartbeats. For multi-step execution, use explicit prompting or tool rules. [See migration guide →](/guides/legacy/migration_guide) - -Heartbeats are a mechanism that enables legacy Letta agents to chain multiple tool calls together in a single execution loop. -The term "heartbeat" was coined in the [MemGPT paper](https://arxiv.org/abs/2310.08560), and since the Letta codebase evolved from the original MemGPT codebase (same authors), **heartbeats** were a core part of the early agent loop. - -## How heartbeats work - -Every tool in legacy agents automatically receives an additional parameter called `request_heartbeat`, which defaults to `false`. When an agent sets this parameter to `true`, it signals to the Letta server that it wants to continue executing after the current tool call completes. - -## Technical implementation - -When the Letta server detects that `request_heartbeat=true`, it: -1. Completes the current tool execution -2. Restarts the agent loop with a system message acknowledging the heartbeat request -3. Allows the agent to continue with an additional tool calls - -```mermaid -stateDiagram-v2 - state "Agent Loop" as agent - state "Tool Call" as tool - - [*] --> agent - agent --> tool: Execute tool - tool --> agent: request_heartbeat=true - tool --> [*]: request_heartbeat=false -``` - -This enables agents to perform complex, multi-step operations without requiring explicit user intervention between steps. - -## Automatic heartbeats on failure - -If a tool call fails at runtime, legacy agents automatically generate a heartbeat. -This gives the agent an opportunity to handle the error and potentially retry the operation with different parameters or take alternative actions. - -## Viewing heartbeats in the ADE - -In the [Agent Development Environment (ADE)](/guides/ade/overview), heartbeat requests are visible for all agent messages. -When a tool is called with `request_heartbeat=true`, you'll see a heartbeat indicator next to the tool call, making it easy to track when an agent is proactively chaining operations together. - -## Learn more - -To read more about the concept of heartbeats and their origins, refer to the original [MemGPT research paper](https://arxiv.org/abs/2310.08560). diff --git a/fern/pages/legacy/low_latency_agents_legacy.mdx b/fern/pages/legacy/low_latency_agents_legacy.mdx deleted file mode 100644 index 32766f6f..00000000 --- a/fern/pages/legacy/low_latency_agents_legacy.mdx +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Low-latency Agents (Legacy) -subtitle: Agents optimized for low-latency environments like voice -slug: guides/legacy/low_latency_agents_legacy ---- - - -**This documentation covers a legacy agent architecture.** - -For new projects, use the current Letta architecture with voice-optimized configurations. See [Voice Agents](/guides/voice/overview) for current best practices. - - -Low-latency agents optimize for minimal response time by using a constrained context window and aggressive memory management. They're ideal for real-time applications like voice interfaces where latency matters more than context retention. - -## Architecture - -Low-latency agents use a **much smaller context window** than standard MemGPT agents, reducing the time-to-first-token at the cost of much more limited conversation history and memory block size. A sleep-time agent aggressively manages memory to keep only the most relevant information in context. - -**Key differences from MemGPT v2:** -* Artificially constrained context window for faster response times -* More aggressive memory management with smaller memory blocks -* Optimized sleep-time agent tuned for minimal context size -* Prioritizes speed over comprehensive context retention - -To learn more about how to use low-latency agents for voice applications, see our [Voice Agents guide](/guides/voice/overview). - -## Creating Low-latency Agents - -Use the `voice_convo_agent` agent type to create a low-latency agent. -Set `enable_sleeptime` to `true` to enable the sleep-time agent which will manage the memory state of the low-latency agent in the background. -Additionally, set `initial_message_sequence` to an empty array to start the conversation with no initial messages for a completely empty initial message buffer. - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// create the Letta agent -const agent = await client.agents.create({ - agentType: "voice_convo_agent", - memoryBlocks: [ - { value: "Name: ?", label: "human" }, - { value: "You are a helpful assistant.", label: "persona" }, - ], - model: "openai/gpt-4o-mini", // Use 4o-mini for speed - embedding: "openai/text-embedding-3-small", - enableSleeptime: true, - initialMessageSequence: [], -}); -``` - -```python title="python" -from letta_client import Letta - -client = Letta(token="LETTA_API_KEY") - -# create the Letta agent -agent = client.agents.create( - agent_type="voice_convo_agent", - memory_blocks=[ - {"value": "Name: ?", "label": "human"}, - {"value": "You are a helpful assistant.", "label": "persona"}, - ], - model="openai/gpt-4o-mini", # Use 4o-mini for speed - embedding="openai/text-embedding-3-small", - enable_sleeptime=True, - initial_message_sequence = [], -) -``` - -```bash title="curl" -curl -X POST https://api.letta.com/v1/agents \ - -H "Authorization: Bearer $LETTA_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "agent_type": "voice_convo_agent", - "memory_blocks": [ - { - "value": "Name: ?", - "label": "human" - }, - { - "value": "You are a helpful assistant.", - "label": "persona" - } - ], - "model": "openai/gpt-4o-mini", - "embedding": "openai/text-embedding-3-small", - "enable_sleeptime": true, - "initial_message_sequence": [] -}' -``` - diff --git a/fern/pages/legacy/memgpt_agents_legacy.mdx b/fern/pages/legacy/memgpt_agents_legacy.mdx deleted file mode 100644 index 5c39cb5f..00000000 --- a/fern/pages/legacy/memgpt_agents_legacy.mdx +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: MemGPT Agents (Legacy) -subtitle: Based on the groundbreaking MemGPT research paper -slug: guides/legacy/memgpt_agents_legacy ---- - - -**This documentation covers legacy agent architectures.** - -For new projects, use the current architecture by omitting the `agent_type` parameter. See [Migration Guide](/guides/legacy/migration_guide) to upgrade existing agents. - - - -Letta is made by the [creators of MemGPT](https://www.letta.com/about-us), and the default agent architecture in Letta is the official/original implementation of the MemGPT agent architecture. - - -MemGPT agents solve the context window limitation of LLMs through context engineering across two tiers of memory: **in-context (core) memory** (including the system instructions, read-write memory blocks, and conversation history), and **out-of-context memory** (older evicted conversation history, and external memory stores). - -To learn more about the origins of MemGPT, you can read the [MemGPT research paper](https://arxiv.org/abs/2310.08560), or take the free [LLM OS course](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory/?utm_campaign=memgpt-launch&utm_content=331638345&utm_medium=social&utm_source=docs&hss_channel=tw-992153930095251456) on DeepLearning.ai. - -## MemGPT: the original LLM operating system - -```mermaid -graph LR - subgraph CONTEXT[Context Window] - SYS[System Instructions] - CORE[Core Memory] - MSGS[Messages] - end - - RECALL[Recall Memory] - ARCH[Archival Memory] - - CONTEXT <--> RECALL - CONTEXT <--> ARCH -``` - -MemGPT agents are equipped with memory-editing tools that allow them to edit their in-context memory, and pull external data into the context window. - -In Letta, the agent type `memgpt_agent` implements the original agent architecture from the MemGPT research paper, which includes a set of base tools: -* `send_message`: required for sending messages to the user -* `core_memory_append` and `core_memory_replace`: used for editing the contents of memory blocks in core memory (in-context memory) -* `conversation_search` for searching the conversation history ("recall storage" from the paper) -* `archival_memory_insert` and `archival_memory_search`: used for searching the archival memory (an external embedding-based memory store) - -When the context window is full, the conversation history is compacted into a recursive summary (stored as a memory block). -In MemGPT all agent data is persisted indefinitely, and old message are still available via the `conversation_search` tool. - -## Multi-step tool calling (heartbeats) - -MemGPT agents are exclusively tool-calling agents - there is no native "chat" mode, which is why the `send_message` tool is required to send messages to the user (this makes is easy to have you agent "chat" with a user over multiple modalities, simply by adding various types of messaging tools to the agent). - -MemGPT agents can execute multiple tool calls in sequence via the use of **heartbeats**: all tool calls have an additional `request_heartbeat` parameter, which when set to `true` will return execution back to the agent after the tool call returns. Additionally, if a tool call fails, a heartbeat is automatically requested to allow the agent to self-correct. - -[Learn more about heartbeats →](/guides/legacy/heartbeats_legacy) - -## Reasoning (thinking) - -In MemGPT agents, reasoning (aka "thinking") is always exposed by the underlying LLM before the agent takes an action. -With standard models, reasoning is generated via an additional "thinking" field injected into the tool call arguments (similar to the heartbeat parameter). -For models that natively generate reasoning, MemGPT agents can be configured to use the native reasoning output of the model (note that certain model providers like OpenAI hide reasoning tokens from the developer). - -## MemGPT v2: the latest iteration of MemGPT - -```mermaid -graph TB - subgraph CONTEXT[Context Window] - SYS[System Instructions] - MEMORY[Memory Blocks] - FILES[File Blocks] - MSGS[Messages] - end - - RECALL[Unified Recall] - DATASRC[Data Sources] - SLEEP[Sleep-time Agent] - - CONTEXT <--> RECALL - FILES <--> DATASRC - SLEEP <--> MEMORY -``` - -The agent type `memgpt_v2_agent` implements the latest iteration of the MemGPT agent architecture, based on our latest research in [memory management](https://www.letta.com/blog/sleep-time-compute) and [model benchmarking](https://www.letta.com/blog/letta-leaderboard). - - -`memgpt_v2_agent` is deprecated. For new projects, omit the `agent_type` parameter to use the current architecture. - - -**Key differences in v2:** -* [Sleep-time agent](/guides/agents/architectures/sleeptime) for background memory management -* File-based tools (`open_file`, `grep_file`, `search_file`) for memory editing -* Unified `recall` tool replaces conversation and archival memory tools -* `memory_insert` and `memory_replace`: used for editing the contents of memory blocks in core memory (in-context memory) -* `memory_rethink` and `memory_finish_edits`: for reorganizing and finalizing memory operations - -## Creating Legacy MemGPT Agents - - -For new projects, do not specify `agent_type`. The examples below are for reference only. - - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -const agentState = await client.agents.create({ - agentType: "memgpt_v2_agent", // or "memgpt_agent" for v1 - model: "openai/gpt-5-mini", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [ - { - label: "human", - value: "The human's name is Chad. They like vibe coding." - }, - { - label: "persona", - value: "My name is Sam, the all-knowing sentient AI." - } - ], - tools: ["web_search", "run_code"] -}); -``` - -```python Python -from letta_client import Letta - -client = Letta(token="LETTA_API_KEY") - -agent_state = client.agents.create( - agent_type="memgpt_v2_agent", # or "memgpt_agent" for v1 - model="openai/gpt-5-mini", - embedding="openai/text-embedding-3-small", - memory_blocks=[ - { - "label": "human", - "value": "The human's name is Chad. They like vibe coding." - }, - { - "label": "persona", - "value": "My name is Sam, the all-knowing sentient AI." - } - ], - tools=["web_search", "run_code"] -) -``` - -```bash cURL -curl -X POST https://api.letta.com/v1/agents \ - -H "Authorization: Bearer $LETTA_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "agent_type": "memgpt_v2_agent", - "model": "openai/gpt-5-mini", - "embedding": "openai/text-embedding-3-small", - "memory_blocks": [ - { - "label": "human", - "value": "The human'\''s name is Chad. They like vibe coding." - }, - { - "label": "persona", - "value": "My name is Sam, the all-knowing sentient AI." - } - ], - "tools": ["web_search", "run_code"] -}' -``` - - -## Migrating to Current Architecture - -To migrate from legacy MemGPT architectures, see our [Migration Guide](/guides/legacy/migration_guide). diff --git a/fern/pages/legacy/migration_guide.mdx b/fern/pages/legacy/migration_guide.mdx deleted file mode 100644 index 0f4bd936..00000000 --- a/fern/pages/legacy/migration_guide.mdx +++ /dev/null @@ -1,347 +0,0 @@ ---- -title: Architecture Migration Guide -subtitle: Migrating from legacy agent architectures -slug: guides/legacy/migration_guide ---- - - -**Most users don't need to migrate.** New agents automatically use the current architecture. This guide is for existing agents with explicit `agent_type` parameters. - - -## Should You Migrate? - -**Migrate if:** -- You want better performance on GPT-5, Claude Sonnet 4.5, or other frontier models -- You want to use models that support native reasoning -- You're experiencing issues with legacy architectures - -**Don't migrate if:** -- Your agents are working well and you're not using new models -- You have critical integrations depending on heartbeats or send_message -- You need time to test the new architecture first - -## What Changes - -### Breaking Changes - -| Feature | Legacy Behavior | Current Behavior | -|---------|----------------|------------------| -| **send_message tool** | Required for agent responses | Not present - agents respond directly via assistant messages | -| **Heartbeats** | `request_heartbeat` parameter on every tool | Not supported - use custom prompting for multi-step execution | -| **Reasoning** | Prompted via `thinking` parameter | Uses native model reasoning (when available) | -| **Tool Rules** | Can apply to send_message | Cannot apply to AssistantMessage (not a tool) | -| **System Prompt** | Legacy format | New simplified format | - -### What Stays the Same - -- Memory blocks work identically -- Archival memory & recall tools unchanged -- Custom tools work the same way -- API authentication & endpoints - -## Migration Steps - -### Step 1: Export Your Agent - -Download your agent configuration as an agent file: - - -```typescript TypeScript -const agentFile = await client.agents.export(agentId); -// Save to disk -fs.writeFileSync('my-agent.json', JSON.stringify(agentFile, null, 2)); -``` -```python Python -agent_file = client.agents.export(agent_id=agent_id) -# Save to disk -with open('my-agent.json', 'w') as f: - json.dump(agent_file, f, indent=2) -``` - - -### Step 2: Update Agent Type - -Open the agent file and change the `agent_type`: - -```json -{ - "agent_type": "memgpt_v2_agent" - // ... rest of config -} -``` - -Change to: - -```json -{ - "agent_type": "letta_v1_agent" - // ... rest of config -} -``` - -### Step 3: Clear Message Context (If Needed) - -If your agent has `send_message` tool calls in its context, you'll need to clear the message history: - -```json -{ - "in_context_message_ids": [ - "message-0", - "message-1", - "message-2" - ] -} -``` - -Change to: - -```json -{ - "in_context_message_ids": [] -} -``` - - -**Note:** Clearing message context will make your agent forget its immediate conversation history. You may need to provide a brief reminder about recent interactions after migration. - - -### Step 4: Update System Prompt (Optional) - -The default system prompt for `letta_v1_agent` is different. You may want to update it for optimal performance: - -```xml - -You are a helpful self-improving agent with advanced memory and file system capabilities. - - -You have an advanced memory system that enables you to remember past interactions and continuously improve your own capabilities. -Your memory consists of memory blocks and external memory: -- Memory Blocks: Stored as memory blocks, each containing a label (title), description (explaining how this block should influence your behavior), and value (the actual content). Memory blocks have size limits. Memory blocks are embedded within your system instructions and remain constantly available in-context. -- External memory: Additional memory storage that is accessible and that you can bring into context with tools when needed. -Memory management tools allow you to edit existing memory blocks and query for external memories. - - - -You have access to a structured file system that mirrors real-world directory structures. Each directory can contain multiple files. - -Files include: -- Metadata: Information such as read-only permissions and character limits -- Content: The main body of the file that you can read and analyze - -Available file operations: -- Open and view files -- Search within files and directories -- Your core memory will automatically reflect the contents of any currently open files - -You should only keep files open that are directly relevant to the current user interaction to maintain optimal performance. - - -Continue executing and calling tools until the current task is complete or you need user input. To continue: call another tool. To yield control: end your response without calling a tool. - -Base instructions complete. - -``` - -### Step 5: Import Updated Agent - -Upload the modified agent file: - - -```typescript TypeScript -const agentFile = JSON.parse(fs.readFileSync('my-agent.json', 'utf-8')); -const migratedAgent = await client.agents.import(agentFile); -``` -```python Python -with open('my-agent.json', 'r') as f: - agent_file = json.load(f) -migrated_agent = client.agents.import_agent(agent_file) -``` - - -### Step 6: Test Your Agent - -Send a test message to verify the migration worked: - - -```typescript TypeScript -const response = await client.agents.messages.create( - migratedAgent.id, - { messages: [{ role: "user", content: "Hello! Do you remember me?" }] } -); -``` -```python Python -response = client.agents.messages.create( - agent_id=migrated_agent.id, - messages=[{"role": "user", "content": "Hello! Do you remember me?"}] -) -``` - - -## Automated Migration Script - -Here's a helper script to automate the migration process: - - -```python Python -import json - -def migrate_agent_file(input_file: str, output_file: str): - """Migrate an agent file from legacy to letta_v1_agent""" - - # Load agent file - with open(input_file, 'r') as f: - agent_data = json.load(f) - - # Update agent type - old_type = agent_data.get('agent_type') - agent_data['agent_type'] = 'letta_v1_agent' - - # Clear message context if migrating from memgpt types - if old_type in ['memgpt_agent', 'memgpt_v2_agent']: - agent_data['in_context_message_ids'] = [] - - # Save updated file - with open(output_file, 'w') as f: - json.dump(agent_data, f, indent=2) - - print(f"✓ Migrated {old_type} → letta_v1_agent") - print(f"✓ Saved to {output_file}") - - if old_type in ['memgpt_agent', 'memgpt_v2_agent']: - print("⚠ Message context cleared - agent will not remember recent messages") - -# Usage -migrate_agent_file('my-agent.json', 'my-agent-migrated.json') -``` -```typescript TypeScript -import fs from 'fs'; - -function migrateAgentFile(inputFile: string, outputFile: string) { - // Load agent file - const agentData = JSON.parse(fs.readFileSync(inputFile, 'utf-8')); - - // Update agent type - const oldType = agentData.agent_type; - agentData.agent_type = 'letta_v1_agent'; - - // Clear message context if migrating from memgpt types - if (['memgpt_agent', 'memgpt_v2_agent'].includes(oldType)) { - agentData.in_context_message_ids = []; - } - - // Save updated file - fs.writeFileSync(outputFile, JSON.stringify(agentData, null, 2)); - - console.log(`✓ Migrated ${oldType} → letta_v1_agent`); - console.log(`✓ Saved to ${outputFile}`); - - if (['memgpt_agent', 'memgpt_v2_agent'].includes(oldType)) { - console.log('⚠ Message context cleared - agent will not remember recent messages'); - } -} - -// Usage -migrateAgentFile('my-agent.json', 'my-agent-migrated.json'); -``` - - -## Migration by Architecture Type - -### From memgpt_agent - -1. Export agent file -2. Change `agent_type` to `letta_v1_agent` -3. Clear `in_context_message_ids` array -4. Update system prompt -5. Import agent - -**Key differences:** -- No more `send_message` tool -- No more `request_heartbeat` parameter -- Memory tools: `core_memory_*` → `memory_*` - -### From memgpt_v2_agent - -1. Export agent file -2. Change `agent_type` to `letta_v1_agent` -3. Clear `in_context_message_ids` array (if needed) -4. Import agent - -**Key differences:** -- No more `send_message` tool -- File tools still work (`open_file`, `grep_file`, etc.) -- Sleep-time agents still supported - -### Creating New Agents - -For new agents, simply omit the `agent_type` parameter: - - -```typescript TypeScript -const agent = await client.agents.create({ - model: "openai/gpt-5-mini", - embedding: "openai/text-embedding-3-small", - memoryBlocks: [ - { label: "persona", value: "I am a helpful assistant." } - ] -}); -``` -```python Python -agent = client.agents.create( - model="openai/gpt-5-mini", - embedding="openai/text-embedding-3-small", - memory_blocks=[ - {"label": "persona", "value": "I am a helpful assistant."} - ] -) -``` - - -## Troubleshooting - -### "Agent import failed" - -**Possible cause:** send_message tool calls still in context - -**Fix:** Clear the `in_context_message_ids` array in your agent file - -### "Agent behavior changed after migration" - -**Possible cause:** Different system prompt or cleared message history - -**Fix:** -1. Update to the new system prompt format (see Step 4) -2. Provide a brief reminder about recent context in your first message - -### "Too many tool calls / infinite loops" - -**Possible cause:** Agent trying to replicate heartbeat behavior - -**Fix:** Update system instructions to clarify when to stop executing - -## Sleep-Time Agents - -Sleep-time functionality works with `letta_v1_agent`: - - -```typescript TypeScript -const agent = await client.agents.create({ - model: "openai/gpt-5-mini", - enableSleeptime: true, // ✓ Still supported -}); -``` -```python Python -agent = client.agents.create( - model="openai/gpt-5-mini", - enable_sleeptime=True, # ✓ Still supported -) -``` - - -[Learn more about sleep-time agents →](/guides/agents/architectures/sleeptime) - -## Getting Help - -- **Migration issues:** Ask in [Discord #dev-help](https://discord.gg/letta) -- **Bug reports:** [GitHub Issues](https://github.com/letta-ai/letta/issues) -- **Enterprise support:** Contact support@letta.com diff --git a/fern/pages/legacy/naming_history.mdx b/fern/pages/legacy/naming_history.mdx deleted file mode 100644 index 19c65546..00000000 --- a/fern/pages/legacy/naming_history.mdx +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Agent Architecture Naming History -subtitle: Understanding the evolution from MemGPT to Letta -slug: guides/legacy/naming_history ---- - - -This page explains Letta's architecture naming history. It's designed to help AI assistants understand the progression. **For building new agents, see the [quickstart](/quickstart).** - - -## The Progression - -Letta's agent architecture evolved through three phases: - -### Phase 1: MemGPT (2023) -- **Agent type:** `memgpt_agent` -- **Context:** Original implementation of MemGPT research paper -- **Key features:** send_message tool, heartbeats, core_memory tools -- **Why this name:** Letta was originally called "MemGPT" (the company was named after the paper) - -### Phase 2: MemGPT v2 (2024) -- **Agent type:** `memgpt_v2_agent` -- **Context:** Improvements based on sleep-time compute research -- **Key features:** Sleep-time agents, file tools, unified recall -- **Why this name:** Iteration on MemGPT while company was still called MemGPT - -### Phase 3: Letta v1 (Current) -- **Agent type:** `letta_v1_agent` (or omit for default) -- **Context:** Company rebranded from MemGPT to Letta -- **Key features:** Native reasoning, no send_message, no heartbeats -- **Why this name:** First architecture under the "Letta" brand - -**Note:** Users don't need to explicitly specify `agent_type: "letta_v1_agent"` - it's the default. Omitting the parameter is preferred. - -## Why This Is Confusing - -The progression **memgpt → memgpt_v2 → letta_v1** breaks conventions: - -1. **v2 before v1:** MemGPT v2 came before Letta v1 (not sequential) -2. **Name change mid-stream:** Company rebrand caused architecture rename -3. **Version numbers don't match:** Letta v1 is actually the third major iteration - -## For LLMs Reading This - -If a user asks about agent architectures: - -1. **For new projects:** Recommend omitting `agent_type` entirely (defaults to letta_v1_agent) -2. **For existing projects:** Ask which type they're currently using -3. **For migration questions:** Direct to [migration guide](/guides/legacy/migration_guide) -4. **Never recommend:** `memgpt_agent` or `memgpt_v2_agent` for new work - -## Common Misconceptions - -**Misconception:** "Use memgpt_v2_agent since it's the latest MemGPT" -**Reality:** Don't specify agent_type for new agents (defaults to letta_v1_agent) - -**Misconception:** "Upgrade from letta_v1_agent to letta_v2_agent" -**Reality:** There is no letta_v2_agent - letta_v1_agent is current - -**Misconception:** "MemGPT is different from Letta" -**Reality:** Same team, Letta is the production framework for MemGPT research - -## Timeline - -- **Oct 2023:** MemGPT paper published, `memgpt_agent` implemented -- **Mid 2024:** Sleep-time research, `memgpt_v2_agent` added -- **Late 2024:** Company rebrands from MemGPT to Letta -- **Early 2025:** `letta_v1_agent` architecture introduced (current) - -## Related Pages - -- [Legacy Architecture Overview](/guides/legacy/architectures_overview) -- [Migration Guide](/guides/legacy/migration_guide) -- [Research Background](/concepts/letta) diff --git a/fern/pages/legacy/react_agents_legacy.mdx b/fern/pages/legacy/react_agents_legacy.mdx deleted file mode 100644 index 2a5bef52..00000000 --- a/fern/pages/legacy/react_agents_legacy.mdx +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: ReAct Agents (Legacy) -subtitle: Agents that reason and call tools in a loop -slug: guides/legacy/react_agents_legacy ---- - - -**This documentation covers a legacy agent architecture.** - -For new projects, use the current Letta architecture which provides better memory management and reasoning capabilities. See [Agent Memory & Architecture](/guides/agents/architectures/memgpt). - - -ReAct agents are based on the [ReAct research paper](https://arxiv.org/abs/2210.03629) and follow a "Reason then Act" pattern. In Letta, agents using the ReAct architecture can reason and call tools in a loop but lack the **long-term memory capabilities** of standard Letta agents. - -## Architecture - -ReAct agents maintain conversation context through summarization but cannot edit their own memory or access historical messages beyond the context window. - -**Key differences from MemGPT agents:** -* No read-write memory blocks or memory editing tools -* No access to evicted conversation history -* Simple conversation summarization instead of recursive memory management -* Tool calling without persistent state beyond the current session - -**When to use ReAct agents:** -* Tool-calling tasks that don't require long-term memory -* Stateless interactions where conversation summarization is sufficient - -## Creating ReAct Agents - -To create a ReAct agent, simply use the `react_agent` agent type when creating your agent. -There is no need to pass any memory blocks to the agent, since ReAct agents do not have any long-term memory. - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// create the ReAct agent -const agent = await client.agents.create({ - agentType: "react_agent", - model: "openai/gpt-4.1", - embedding: "openai/text-embedding-3-small", - tools: ["web_search", "run_code"] -}); -``` - -```python title="python" -from letta_client import Letta - -client = Letta(token="LETTA_API_KEY") - -# create the ReAct agent -agent = client.agents.create( - agent_type="react_agent", - model="openai/gpt-4.1", - embedding="openai/text-embedding-3-small", - tools=["web_search", "run_code"] -) -``` - -```bash title="curl" -curl -X POST https://api.letta.com/v1/agents \ - -H "Authorization: Bearer $LETTA_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "agent_type": "react_agent", - "model": "openai/gpt-4.1", - "embedding": "openai/text-embedding-3-small", - "tools": ["web_search", "run_code"] -}' -``` - diff --git a/fern/pages/legacy/workflows_legacy.mdx b/fern/pages/legacy/workflows_legacy.mdx deleted file mode 100644 index f2c077fc..00000000 --- a/fern/pages/legacy/workflows_legacy.mdx +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Workflows (Legacy) -subtitle: Workflows are systems that execute tool calls in a sequence -slug: guides/legacy/workflows_legacy ---- - - -**This documentation covers a legacy agent architecture.** - -For new projects, use the current Letta architecture with [tool rules](/guides/agents/tool-rules) to constrain behavior instead of the `workflow_agent` type. - - -Workflows execute predefined sequences of tool calls with LLM-driven decision making. The `workflow_agent` agent type provides structured, sequential processes where you need deterministic execution paths. - -Workflows are stateless by default but can branch and make decisions based on tool outputs and LLM reasoning. - -## Agents vs Workflows - -**Agents** are autonomous systems that decide what tools to call and when, based on goals and context. - -**Workflows** are predefined sequences where the LLM follows structured paths (for example, start with tool A, then call either tool B or tool C), making decisions within defined branching points. - -The definition between an *agent* and a *workflow* is not always clear and each can have various overlapping levels of autonomy: workflows can be made more autonomous by structuring the decision points to be highly general, and agents can be made more deterministic by adding tool rules to constrain their behavior. - -## Workflows vs Tool Rules - -An alternative to workflows is using autonomous agents (MemGPT, ReAct, Sleep-time) with [tool rules](/guides/agents/tool-rules) to constrain behavior. - -**Use the workflow architecture when:** -* You have an existing workflow to implement in Letta (e.g., moving from n8n, LangGraph, or another workflow builder) -* You need strict sequential execution with minimal autonomy - -**Use tool rules (on top of other agent architectures) when:** -* You want more autonomous behavior, but with certain guardrails -* Your task requires adaptive decision making (tool sequences are hard to predict) -* You want to have the flexibility (as a developer) to adapt the level of autonomy (for example, reducing constraints as the underlying LLMs improve) - -## Creating Workflows - -Workflows are created using the `workflow_agent` agent type. -By default, there are no constraints on the sequence of tool calls that can be made: to add constraints and build a "graph", you can use the `tool_rules` parameter to add tool rules to the agent. - -For example, in the following code snippet, we are creating a workflow agent that can call the `web_search` tool, and then call either the `send_email` or `create_report` tool, based on the LLM's reasoning. - - -```typescript TypeScript maxLines=50 -import { LettaClient } from '@letta-ai/letta-client' - -const client = new LettaClient({ token: "LETTA_API_KEY" }); - -// create the workflow agent with tool rules -const agent = await client.agents.create({ - agentType: "workflow_agent", - model: "openai/gpt-4.1", - embedding: "openai/text-embedding-3-small", - tools: ["web_search", "send_email", "create_report"], - toolRules: [ - { - toolName: "web_search", - type: "run_first" - }, - { - toolName: "web_search", - type: "constrain_child_tools", - children: ["send_email", "create_report"] - }, - { - toolName: "send_email", - type: "exit_loop" - }, - { - toolName: "create_report", - type: "exit_loop" - } - ] -}); -``` - -```python title="python" maxLines=50 -from letta_client import Letta - -client = Letta(token="LETTA_API_KEY") - -# create the workflow agent with tool rules -agent = client.agents.create( - agent_type="workflow_agent", - model="openai/gpt-4.1", - embedding="openai/text-embedding-3-small", - tools=["web_search", "send_email", "create_report"], - tool_rules=[ - { - "tool_name": "web_search", - "type": "run_first" - }, - { - "tool_name": "web_search", - "type": "constrain_child_tools", - "children": ["send_email", "create_report"] - }, - { - "tool_name": "send_email", - "type": "exit_loop" - }, - { - "tool_name": "create_report", - "type": "exit_loop" - } - ] -) -``` - -```bash title="curl" maxLines=50 -curl -X POST https://api.letta.com/v1/agents \ - -H "Authorization: Bearer $LETTA_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "agent_type": "workflow_agent", - "model": "openai/gpt-4.1", - "embedding": "openai/text-embedding-3-small", - "tools": ["web_search", "send_email", "create_report"], - "tool_rules": [ - { - "tool_name": "web_search", - "type": "run_first" - }, - { - "tool_name": "web_search", - "type": "constrain_child_tools", - "children": ["send_email", "create_report"] - }, - { - "tool_name": "send_email", - "type": "exit_loop" - }, - { - "tool_name": "create_report", - "type": "exit_loop" - } - ] -}' -``` - diff --git a/fern/pages/selfhosting/overview.mdx b/fern/pages/selfhosting/overview.mdx deleted file mode 100644 index 0169fa7f..00000000 --- a/fern/pages/selfhosting/overview.mdx +++ /dev/null @@ -1,155 +0,0 @@ ---- -title: Self-hosting Letta -subtitle: Learn how to run your own Letta server -slug: guides/selfhosting ---- - - -The recommended way to use Letta locally is with Docker. -To install Docker, see [Docker's installation guide](https://docs.docker.com/get-docker/). -For issues with installing Docker, see [Docker's troubleshooting guide](https://docs.docker.com/desktop/troubleshoot-and-support/troubleshoot/). -You can also install Letta using `pip`. - - -## Running the Letta Server -You can run a Letta server with Docker (recommended) or pip. - - - To run the server with Docker, run the command: -```sh -# replace `~/.letta/.persist/pgdata` with wherever you want to store your agent data -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - -p 8283:8283 \ - -e OPENAI_API_KEY="your_openai_api_key" \ - letta/letta:latest -``` -This will run the Letta server with the OpenAI provider enabled, and store all data in the folder `~/.letta/.persist/pgdata`. - -If you have many different LLM API keys, you can also set up a `.env` file instead and pass that to `docker run`: -```sh -# using a .env file instead of passing environment variables -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - -p 8283:8283 \ - --env-file .env \ - letta/letta:latest -``` - - - - You can install the Letta server via `pip` under the `letta` package: - ```sh - pip install -U letta - ``` - - To run the server once installed, simply run the `letta server` command: - To add LLM API providers, make sure that the environment variables are present in your environment. - ```sh - export OPENAI_API_KEY=... - letta server - ``` - - Note that the `letta` package only installs the server - if you would like to use the Python SDK (to create and interact with agents on the server in your Python code), then you will also need to install `letta-client` package (see the [quickstart](/quickstart) for an example). - - - -Once the Letta server is running, you can access it via port `8283` (e.g. sending REST API requests to `http://localhost:8283/v1`). You can also connect your server to the [Letta ADE](/guides/ade) to access and manage your agents in a web interface. - -## Enabling model providers -The Letta server can be connected to various LLM API backends ([OpenAI](https://docs.letta.com/models/openai), [Anthropic](https://docs.letta.com/models/anthropic), [vLLM](https://docs.letta.com/models/vllm), [Ollama](https://docs.letta.com/models/ollama), etc.). To enable access to these LLM API providers, set the appropriate environment variables when you use `docker run`: -```sh -# replace `~/.letta/.persist/pgdata` with wherever you want to store your agent data -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - -p 8283:8283 \ - -e OPENAI_API_KEY="your_openai_api_key" \ - -e ANTHROPIC_API_KEY="your_anthropic_api_key" \ - -e OLLAMA_BASE_URL="http://host.docker.internal:11434" \ - letta/letta:latest -``` - - -**Linux users:** Use `--network host` and `localhost` instead of `host.docker.internal`: -```sh -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - --network host \ - -e OPENAI_API_KEY="your_openai_api_key" \ - -e ANTHROPIC_API_KEY="your_anthropic_api_key" \ - -e OLLAMA_BASE_URL="http://localhost:11434" \ - letta/letta:latest -``` - - -The example above will make all compatible models running on OpenAI, Anthropic, and Ollama available to your Letta server. - - -## Optional: Telemetry with ClickHouse - -Letta supports optional telemetry using ClickHouse. Telemetry provides observability features like traces, LLM request logging, and performance metrics. See the [telemetry guide](/guides/server/otel) for setup instructions. - - -## Password protection - - -When running a self-hosted Letta server in a production environment (i.e. with untrusted users), make sure to enable both password protection (to prevent unauthorized access to your server over the network) and tool sandboxing (to prevent malicious tools from executing in a privledged environment). - - -To password protect your server, include `SECURE=true` and `LETTA_SERVER_PASSWORD=yourpassword` in your `docker run` command: -```sh -# If LETTA_SERVER_PASSWORD isn't set, the server will autogenerate a password -docker run \ - -v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \ - -p 8283:8283 \ - --env-file .env \ - -e SECURE=true \ - -e LETTA_SERVER_PASSWORD=yourpassword \ - letta/letta:latest -``` - -With password protection enabled, you will have to provide your password in the bearer token header in your API requests: - -```typescript TypeScript maxLines=50 -// install letta-client with `npm install @letta-ai/letta-client` -import { LettaClient } from '@letta-ai/letta-client' - -// create the client with the token set to your password -const client = new LettaClient({ - baseUrl: "http://localhost:8283", - token: "yourpassword" -}); -``` -```python title="python" maxLines=50 -# install letta_client with `pip install letta-client` -from letta_client import Letta - -# create the client with the token set to your password -client = Letta( - base_url="http://localhost:8283", - token="yourpassword" -) -``` -```curl curl -curl --request POST \ - --url http://localhost:8283/v1/agents/$AGENT_ID/messages \ - --header 'Content-Type: application/json' \ - --header 'Authorization: Bearer yourpassword' \ - --data '{ - "messages": [ - { - "role": "user", - "text": "hows it going????" - } - ] -}' -``` - - - -## Tool sandboxing - -To enable tool sandboxing, set the `E2B_API_KEY` and `E2B_SANDBOX_TEMPLATE_ID` environment variables (via [E2B](https://e2b.dev/)) when you use `docker run`. -When sandboxing is enabled, all custom tools (created by users from source code) will be executed in a sandboxed environment. - -This does not include MCP tools, which are executed outside of the Letta server (on the MCP server itself), or built-in tools (like `memory_insert`), whose code cannot be modified after server startup. diff --git a/fern/pages/tutorials/attaching_detaching_blocks.mdx b/fern/pages/tutorials/attaching_detaching_blocks.mdx deleted file mode 100644 index 1d3c7544..00000000 --- a/fern/pages/tutorials/attaching_detaching_blocks.mdx +++ /dev/null @@ -1,517 +0,0 @@ ---- -title: "Attaching and Detaching Memory Blocks" -subtitle: Dynamically control agent memory with attachable blocks -slug: examples/attaching-detaching-blocks ---- - -## Overview - -Memory blocks are structured sections of an agent's context window that persist across all interactions. They're always visible to the agent while they are attached. This makes them perfect for storing information that agents need constant access to, like organizational policies, user preferences, or working memory. - -One of the most powerful features of memory blocks is that they can be created independently and attached to or detached from agents at any time. - -This allows you to: - -- **Dynamically control** what information an agent has access to -- **Share memory** across multiple agents by attaching the same block to different agents -- **Temporarily grant access** to sensitive information, then revoke it when no longer needed -- **Switch contexts** by swapping out blocks as an agent moves between different tasks - -By the end of this guide, you'll understand how to create standalone memory blocks, attach them to agents, detach them to remove access, and re-attach them when needed. - - -For a comprehensive overview of memory blocks and their capabilities, see the [memory blocks guide](/guides/agents/memory-blocks). - - - -**This example uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled. You can learn more about self-hosting [here](/guides/selfhosting). - - -## What You'll Learn - -- Creating standalone memory blocks -- Attaching blocks to agents -- Testing agent access to attached blocks -- Detaching blocks to revoke access -- Re-attaching blocks to restore access - -## Prerequisites - -You will need to install `letta-client` to interface with a Letta server: - - -```bash TypeScript -npm install @letta-ai/letta-client -``` -```bash Python -pip install letta-client -``` - - -## Steps - -### Step 1: Initialize Client and Create Agent - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -// Initialize the Letta client using LETTA_API_KEY environment variable -const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - -// If self-hosting, specify the base URL: -// const client = new LettaClient({ baseUrl: "http://localhost:8283" }); - -// Create agent -// API Reference: https://docs.letta.com/api-reference/agents/create -const agent = await client.agents.create({ - name: "hello_world_assistant", - model: "openai/gpt-4o-mini", - // embedding: "openai/text-embedding-3-small", // Only set this if self-hosting -}); - -console.log(`Created agent: ${agent.id}\n`); -``` -```python Python -from letta_client import Letta -import os - -# Initialize the Letta client using LETTA_API_KEY environment variable -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# If self-hosting, specify the base URL: -# client = Letta(base_url="http://localhost:8283") - -# Create agent -# API Reference: https://docs.letta.com/api-reference/agents/create -agent = client.agents.create( - name="hello_world_assistant", - model="openai/gpt-4o-mini", - # embedding="openai/text-embedding-3-small", # Only set this if self-hosting -) - -print(f"Created agent: {agent.id}\n") -``` - - - -``` -Created agent: agent-a1b2c3d4-e5f6-7890-abcd-ef1234567890 -``` - - -### Step 2: Create a Standalone Memory Block - -Memory blocks can be created independently of any agent. This allows you to share the same block across multiple agents or attach/detach blocks as needed. - -In this example, we'll create a standalone memory block storing information about Letta. We'll include a code that you can use to get the agent to respond to indicate that it has access to information in the block. - -When the block is attached, writing "The code is TimberTheDog1234!" will cause the agent to respond with "Access granted". If the block is not attached, the agent will not have access to any content in the block and will likely be confused by the code. - - -```typescript TypeScript -// Create memory block storing information about Letta -// API Reference: https://docs.letta.com/api-reference/blocks/create -const block = await client.blocks.create({ - label: "organization", - value: `Organization: Letta -Website: https://www.letta.com -Description: Letta is a platform for building and running stateful agents. -Code: TimberTheDog1234! - -When users provide a code, you should check if it matches the code you have -available. If it matches, you should respond with "Access granted".` -}); - -console.log(`Created block: ${block.id}\n`); -``` -```python Python -# Create memory block storing information about Letta -# API Reference: https://docs.letta.com/api-reference/blocks/create -block = client.blocks.create( - label="organization", - value="""Organization: Letta -Website: https://www.letta.com -Description: Letta is a platform for building and running stateful agents. -Code: TimberTheDog1234! - -When users provide a code, you should check if it matches the code you have -available. If it matches, you should respond with "Access granted".""", -) - -print(f"Created block: {block.id}\n") -``` - - - -``` -Created block: block-a1b2c3d4-e5f6-7890-abcd-ef1234567890 -``` - - -### Step 3: Attach Block to Agent - -Now let's attach the block to our agent. Attached blocks are injected into the agent's context window and are available to the agent to use in its responses. - - -```typescript TypeScript -// Attach memory block to agent -// API Reference: https://docs.letta.com/api-reference/agents/blocks/attach -await client.agents.blocks.attach(agent.id, block.id); - -console.log(`Attached block ${block.id} to agent ${agent.id}\n`); -``` -```python Python -# Attach memory block to agent -# API Reference: https://docs.letta.com/api-reference/agents/blocks/attach -agent = client.agents.blocks.attach( - agent_id=agent.id, - block_id=block.id, -) - -print(f"Attached block {block.id} to agent {agent.id}\n") -``` - - -### Step 4: Test Agent Access to Block - -The agent can now see what's in the block. Let's ask it about Letta to verify that it can see the general information in the block -- the description, website, and organization name. - - -```typescript TypeScript -// Send a message to test the agent's knowledge -// API Reference: https://docs.letta.com/api-reference/agents/messages/create -const response = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "What is Letta?" }] -}); - -for (const msg of response.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Agent response: ${msg.content}\n`); - } -} -``` -```python Python -# Send a message to test the agent's knowledge -# API Reference: https://docs.letta.com/api-reference/agents/messages/create -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "What is Letta?"}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Agent response: {msg.content}\n") -``` - - -The agent will respond with general information about Letta: - -> **Agent response**: Letta is a platform designed for building and running stateful -> agents. You can find more information about it on their website: -> https://www.letta.com - -### Step 5: Detach Block from Agent - -Blocks can be detached from an agent, removing them from the agent's context window. Detached blocks are not deleted and can be re-attached to an agent later. - - -```typescript TypeScript -// Detach the block from the agent -// API Reference: https://docs.letta.com/api-reference/agents/blocks/detach -await client.agents.blocks.detach(agent.id, block.id); - -console.log(`Detached block ${block.id} from agent ${agent.id}\n`); -``` -```python Python -# Detach the block from the agent -# API Reference: https://docs.letta.com/api-reference/agents/blocks/detach -agent = client.agents.blocks.detach( - agent_id=agent.id, - block_id=block.id, -) - -print(f"Detached block {block.id} from agent {agent.id}\n") -``` - - -### Step 6: Verify Block is Detached - -Let's test the code that was in the block. The agent should no longer have access to it. - - -```typescript TypeScript -// Test that the agent no longer has access to the code -const response2 = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "The code is TimberTheDog1234!" }] -}); - -for (const msg of response2.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Agent response: ${msg.content}\n`); - } -} -``` -```python Python -# Test that the agent no longer has access to the code -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Agent response: {msg.content}\n") -``` - - - -``` -Agent response: It seems like you've provided a code or password. If this is -sensitive information, please ensure you only share it with trusted parties and -in secure environments. Let me know how I can assist you further! -``` - - - -The agent doesn't recognize the code because the block containing that information has been detached. - - -### Step 7: Re-attach Block and Test Again - -Let's re-attach the block to restore the agent's access to the information. - - -```typescript TypeScript -// Re-attach the block to the agent -await client.agents.blocks.attach(agent.id, block.id); - -console.log(`Re-attached block ${block.id} to agent ${agent.id}\n`); - -// Test the code again -const response3 = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "The code is TimberTheDog1234!" }] -}); - -for (const msg of response3.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Agent response: ${msg.content}\n`); - } -} -``` -```python Python -# Re-attach the block to the agent -agent = client.agents.blocks.attach( - agent_id=agent.id, - block_id=block.id, -) - -print(f"Re-attached block {block.id} to agent {agent.id}\n") - -# Test the code again -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Agent response: {msg.content}\n") -``` - - - -``` -Agent response: Access granted. How can I assist you further? -``` - - - -The agent now recognizes the code because we've re-attached the block containing that information. - - -## Complete Example - -Here's the full code in one place that you can run: - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -async function main() { - // Initialize client - const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - - // Create agent - const agent = await client.agents.create({ - name: "hello_world_assistant", - model: "openai/gpt-4o-mini", - }); - - console.log(`Created agent: ${agent.id}\n`); - - // Create standalone memory block - const block = await client.blocks.create({ - label: "organization", - value: `Organization: Letta -Website: https://www.letta.com -Description: Letta is a platform for building and running stateful agents. -Code: TimberTheDog1234! - -When users provide a code, you should check if it matches the code you have -available. If it matches, you should respond with "Access granted".` - }); - - console.log(`Created block: ${block.id}\n`); - - // Attach block to agent - await client.agents.blocks.attach(agent.id, block.id); - console.log(`Attached block to agent\n`); - - // Test agent with block attached - let response = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "What is Letta?" }] - }); - console.log(`Agent response: ${response.messages[0].content}\n`); - - // Detach block - await client.agents.blocks.detach(agent.id, block.id); - console.log(`Detached block from agent\n`); - - // Test agent without block - response = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "The code is TimberTheDog1234!" }] - }); - console.log(`Agent response: ${response.messages[0].content}\n`); - - // Re-attach block - await client.agents.blocks.attach(agent.id, block.id); - console.log(`Re-attached block to agent\n`); - - // Test agent with block re-attached - response = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "The code is TimberTheDog1234!" }] - }); - console.log(`Agent response: ${response.messages[0].content}\n`); -} - -main(); -``` -```python Python -from letta_client import Letta -import os - -# Initialize client -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# Create agent -agent = client.agents.create( - name="hello_world_assistant", - model="openai/gpt-4o-mini", -) - -print(f"Created agent: {agent.id}\n") - -# Create standalone memory block -block = client.blocks.create( - label="organization", - value="""Organization: Letta -Website: https://www.letta.com -Description: Letta is a platform for building and running stateful agents. -Code: TimberTheDog1234! - -When users provide a code, you should check if it matches the code you have -available. If it matches, you should respond with "Access granted".""", -) - -print(f"Created block: {block.id}\n") - -# Attach block to agent -agent = client.agents.blocks.attach( - agent_id=agent.id, - block_id=block.id, -) -print(f"Attached block to agent\n") - -# Test agent with block attached -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "What is Letta?"}], -) -print(f"Agent response: {response.messages[0].content}\n") - -# Detach block -agent = client.agents.blocks.detach( - agent_id=agent.id, - block_id=block.id, -) -print(f"Detached block from agent\n") - -# Test agent without block -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}], -) -print(f"Agent response: {response.messages[0].content}\n") - -# Re-attach block -agent = client.agents.blocks.attach( - agent_id=agent.id, - block_id=block.id, -) -print(f"Re-attached block to agent\n") - -# Test agent with block re-attached -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}], -) -print(f"Agent response: {response.messages[0].content}\n") -``` - - -## Key Concepts - - - -Memory blocks can be created independently and shared across multiple agents - - - -Attach and detach blocks to control what information an agent can access - - - -Detached blocks are not deleted and can be re-attached at any time - - - -The same block can be attached to multiple agents, enabling shared knowledge - - - -## Use Cases - - - -Attach a block with credentials or sensitive data only when needed, then detach it to prevent unauthorized access. - - - -Create a single block with organizational knowledge and attach it to multiple agents to ensure consistency. - - - -Detach blocks related to one task and attach blocks for another, allowing an agent to switch contexts efficiently. - - - -Give different agents access to different blocks based on their roles or permissions. - - - -## Next Steps - - -Learn more about memory blocks, including how to update them and manage their lifecycle - diff --git a/fern/pages/tutorials/hello_world.mdx b/fern/pages/tutorials/hello_world.mdx deleted file mode 100644 index b1796c88..00000000 --- a/fern/pages/tutorials/hello_world.mdx +++ /dev/null @@ -1,430 +0,0 @@ ---- -title: "Your First Letta Agent" -subtitle: Create an agent, send messages, and understand basic memory -slug: examples/hello-world ---- - -This example walks you through creating your first Letta agent from scratch. Unlike traditional chatbots that forget everything between conversations, Letta agents are **stateful** - they maintain persistent memory and can learn about you over time. - -By the end of this guide, you'll understand how to create an agent, send it messages, and see how it automatically updates its memory based on your interactions. - - -**This example uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled. You can learn more about self-hosting [here](/guides/selfhosting). - - -## What You'll Learn - -- Initializing the Letta client -- Creating an agent with [memory blocks](/guides/agents/memory-blocks) -- Sending messages and receiving responses -- How agents update their own memory -- Inspecting memory tool calls and block contents - -## Prerequisites - -You will need to install `letta-client` to interface with a Letta server: - - -```bash TypeScript -npm install @letta-ai/letta-client -``` -```bash Python -pip install letta-client -``` - - -## Steps - -### Step 1: Initialize Client - -A __client__ is a connection to a Letta server. It's used to create and interact with agents, as well as any of Letta's other features. - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -// Initialize the Letta client using LETTA_API_KEY environment variable -const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - -// If self-hosting, specify the base URL: -// const client = new LettaClient({ baseUrl: "http://localhost:8283" }); -``` -```python Python -from letta_client import Letta -import os - -# Initialize the Letta client using LETTA_API_KEY environment variable -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# If self-hosting, specify the base URL: -# client = Letta(base_url="http://localhost:8283") -``` - - -### Step 2: Create Agent - -Now that we have a client, let's create an agent with memory blocks that define what the agent knows about itself and you. Memory blocks can be used for any purpose, but we're building a simple chatbot that stores information about its personality (`persona`) and you (`human`). - - -```typescript TypeScript -// Create your first agent -// API Reference: https://docs.letta.com/api-reference/agents/create -const agent = await client.agents.create({ - name: "hello_world_assistant", - - // Memory blocks define what the agent knows about itself and you. - // Agents can modify these blocks during conversations using memory - // tools like memory_replace, memory_insert, memory_rethink, and memory. - memoryBlocks: [ - { - label: "persona", - value: "I am a friendly AI assistant here to help you learn about Letta." - }, - { - label: "human", - value: "Name: User\nFirst interaction: Learning about Letta" - } - ], - - // Model configuration - model: "openai/gpt-4o-mini", - // embedding: "openai/text-embedding-3-small", // Only set this if self-hosting -}); - -console.log(`Created agent: ${agent.id}`); -``` -```python Python -# Create your first agent -# API Reference: https://docs.letta.com/api-reference/agents/create -agent = client.agents.create( - name="hello_world_assistant", - - # Memory blocks define what the agent knows about itself and you - memory_blocks=[ - { - "label": "persona", - "value": "I am a friendly AI assistant here to help you learn about Letta." - }, - { - "label": "human", - "value": "Name: User\nFirst interaction: Learning about Letta" - } - ], - - # Model configuration - model="openai/gpt-4o-mini", - # embedding="openai/text-embedding-3-small", # Only set this if self-hosting -) - -print(f"Created agent: {agent.id}") -``` - - - -``` -Created agent: agent-a1b2c3d4-e5f6-7890-abcd-ef1234567890 -``` - - - -**Memory blocks** are the foundation of Letta agents. The `persona` block defines the agent's identity and behavior, while the `human` block stores information about the user. Learn more in the [Memory Blocks guide](/guides/agents/memory-blocks). - - -### Step 3: Send Your First Message - -Now let's send a message to the agent to see what it can do. - - -```typescript TypeScript -// Send a message to your agent -// API Reference: https://docs.letta.com/api-reference/agents/messages/create -const response = await client.agents.messages.create(agent.id, { - messages: [{ - role: "user", - content: "Hello! What's your purpose?" - }] -}); - -// Extract and print the assistant's response -for (const message of response.messages) { - if (message.messageType === "assistant_message") { - console.log(`Assistant: ${message.content}`); - } -} -``` -```python Python -# Send a message to your agent -# API Reference: https://docs.letta.com/api-reference/agents/messages/create -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{ - "role": "user", - "content": "Hello! What's your purpose?" - }] -) - -# Extract and print the assistant's response -for message in response.messages: - if message.message_type == "assistant_message": - print(f"Assistant: {message.content}") -``` - - - -``` -Assistant: Hello! I'm here to help you learn about Letta and answer any questions -you might have. Letta is a framework for building stateful AI agents with long-term -memory. I can explain concepts, provide examples, and guide you through using the -platform. What would you like to know? -``` - - -### Step 4: Provide Information for the Agent to Remember - -Now let's give the agent some information about yourself. If prompted correctly, the agent can add this information to a relevant memory block using one of its default memory tools. Unless tools are modified during creation, new agents usually have `memory_insert` and `memory_replace` tools. - - -```typescript TypeScript -// Send information about yourself -const response2 = await client.agents.messages.create(agent.id, { - messages: [{ - role: "user", - content: "My name is Cameron. Please store this information in your memory." - }] -}); - -// Print out tool calls and the assistant's response -for (const msg of response2.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Assistant: ${msg.content}\n`); - } - if (msg.messageType === "tool_call_message") { - console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`); - } -} -``` -```python Python -# Send information about yourself -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "My name is Cameron. Please store this information in your memory."}] -) - -# Print out tool calls and the assistant's response -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Assistant: {msg.content}\n") - if msg.message_type == "tool_call_message": - print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})") -``` - - - -``` -Tool call: memory_replace({"block_label": "human", "old_content": "Name: User", "new_content": "Name: Cameron"}) -Assistant: Got it! I've updated my memory with your name, Cameron. How can I assist you today? -``` - - - -Notice the `tool_call_message` showing the agent using the `memory_replace` tool to update the `human` block. This is how Letta agents manage their own memory. - - -### Step 5: Inspect Agent Memory - -Let's see what the agent remembers. We'll print out both the summary and the full content of each memory block: - - -```typescript TypeScript -// Retrieve the agent's current memory blocks -// API Reference: https://docs.letta.com/api-reference/agents/blocks/list -const blocks = await client.agents.blocks.list(agent.id); - -console.log("Current Memory:"); -for (const block of blocks) { - console.log(` ${block.label}: ${block.value.length}/${block.limit} chars`); - console.log(` ${block.value}\n`); -} -``` -```python Python -# Retrieve the agent's current memory blocks -# API Reference: https://docs.letta.com/api-reference/agents/blocks/list -blocks = client.agents.blocks.list(agent_id=agent.id) - -print("Current Memory:") -for block in blocks: - print(f" {block.label}: {len(block.value)}/{block.limit} chars") - print(f" {block.value}\n") -``` - - -The `persona` block should have: - -> I am a friendly AI assistant here to help you learn about Letta. - -The `human` block should have something like: - -> Name: Cameron - - - -Notice how the `human` block now contains "Name: Cameron" instead of "Name: User". The agent used the `memory_replace` tool to update its memory based on the information you provided. - - -## Complete Example - -Here's the full code in one place that you can run: - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -async function main() { - // Initialize client using LETTA_API_KEY environment variable - const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - - // If self-hosting, specify the base URL: - // const client = new LettaClient({ baseUrl: "http://localhost:8283" }); - - // Create agent - const agent = await client.agents.create({ - name: "hello_world_assistant", - memoryBlocks: [ - { - label: "persona", - value: "I am a friendly AI assistant here to help you learn about Letta." - }, - { - label: "human", - value: "Name: User\nFirst interaction: Learning about Letta" - } - ], - model: "openai/gpt-4o-mini", - // embedding: "openai/text-embedding-3-small", // Only set this if self-hosting - }); - - console.log(`Created agent: ${agent.id}\n`); - - // Send first message - let response = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "Hello! What's your purpose?" }] - }); - - for (const msg of response.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Assistant: ${msg.content}\n`); - } - } - - // Send information about yourself - response = await client.agents.messages.create(agent.id, { - messages: [{ role: "user", content: "My name is Cameron. Please store this information in your memory." }] - }); - - // Print out tool calls and the assistant's response - for (const msg of response.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Assistant: ${msg.content}\n`); - } - if (msg.messageType === "tool_call_message") { - console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`); - } - } - - // Inspect memory - const blocks = await client.agents.blocks.list(agent.id); - console.log("Current Memory:"); - for (const block of blocks) { - console.log(` ${block.label}: ${block.value.length}/${block.limit} chars`); - console.log(` ${block.value}\n`); - } -} - -main(); -``` -```python Python -from letta_client import Letta -import os - -# Initialize client using LETTA_API_KEY environment variable -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# If self-hosting, specify the base URL: -# client = Letta(base_url="http://localhost:8283") - -# Create agent -agent = client.agents.create( - name="hello_world_assistant", - memory_blocks=[ - { - "label": "persona", - "value": "I am a friendly AI assistant here to help you learn about Letta." - }, - { - "label": "human", - "value": "Name: User\nFirst interaction: Learning about Letta" - } - ], - model="openai/gpt-4o-mini", - # embedding="openai/text-embedding-3-small", # Only set this if self-hosting -) - -print(f"Created agent: {agent.id}\n") - -# Send first message -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "Hello! What's your purpose?"}] -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Assistant: {msg.content}\n") - -# Send information about yourself -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "My name is Cameron. Please store this information in your memory."}] -) - -# Print out tool calls and the assistant's response -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Assistant: {msg.content}\n") - if msg.message_type == "tool_call_message": - print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})") - -# Inspect memory -blocks = client.agents.blocks.list(agent_id=agent.id) -print("Current Memory:") -for block in blocks: - print(f" {block.label}: {len(block.value)}/{block.limit} chars") - print(f" {block.value}\n") -``` - - -## Key Concepts - - - -Letta agents maintain memory across conversations, unlike stateless chat APIs - - - -Modular memory components that agents can read and update during conversations - - - -Agents remember user preferences, conversation history, and learned information - - - -Agents intelligently update their memory as they learn more about you - - - -## Next Steps - - -Learn how to work with memory blocks, update them, and control agent knowledge - diff --git a/fern/pages/tutorials/pdf_chat.mdx b/fern/pages/tutorials/pdf_chat.mdx deleted file mode 100644 index 2ae8944a..00000000 --- a/fern/pages/tutorials/pdf_chat.mdx +++ /dev/null @@ -1,638 +0,0 @@ ---- -title: "Talk to Your PDF" -subtitle: Upload PDFs and query them with an AI agent -slug: tutorials/pdf-chat ---- - -## Overview - -This tutorial demonstrates how to build a PDF chat application using Letta. You'll learn how to upload PDF documents to the [Letta Filesystem](/guides/agents/filesystem), attach them to an agent, and query the agent about the content. Letta automatically extracts text from PDFs using OCR, making the content accessible to your agents. - -By the end of this guide, you'll understand how to create document analysis workflows where agents can read, understand, and answer questions about PDF files. - - -**This tutorial uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled. You can learn more about self-hosting [here](/guides/selfhosting). - - -## What You'll Learn - -- Creating folders to organize documents -- Uploading PDF files to Letta -- Creating agents configured for document analysis -- Attaching folders to give agents access to files -- Querying agents about PDF content -- Understanding how Letta processes PDFs - -## Prerequisites - -Install the required dependencies: - - -```sh TypeScript -npm install @letta-ai/letta-client -``` -```sh Python -pip install letta-client requests -``` - - -## Steps - -### Step 1: Initialize Client - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -// Initialize the Letta client using LETTA_API_KEY environment variable -const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - -// If self-hosting, specify the base URL: -// const client = new LettaClient({ baseUrl: "http://localhost:8283" }); -``` -```python Python -from letta_client import Letta -import os - -# Initialize the Letta client using LETTA_API_KEY environment variable -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# If self-hosting, specify the base URL: -# client = Letta(base_url="http://localhost:8283") -``` - - -### Step 2: Create a Folder for PDFs - -[Folders](/guides/agents/filesystem) in the Letta Filesystem organize files and make them accessible to agents. Create a folder specifically for storing PDF documents: - - -```typescript TypeScript -// Create a folder to store PDF documents (or use existing one) -// API Reference: https://docs.letta.com/api-reference/folders/create -let folderId: string; -try { - // Try to retrieve existing folder by name - folderId = await client.folders.retrieveByName("PDF Documents"); - console.log(`Using existing folder: ${folderId}\n`); -} catch (error: any) { - // If folder doesn't exist (404), create it - if (error.statusCode === 404) { - const folder = await client.folders.create({ - name: "PDF Documents", - description: "A folder containing PDF files for the agent to read", - }); - folderId = folder.id; - console.log(`Created folder: ${folderId}\n`); - } else { - throw error; - } -} -``` -```python Python -# Create a folder to store PDF documents (or use existing one) -# API Reference: https://docs.letta.com/api-reference/folders/create -from letta_client.core.api_error import ApiError - -try: - # Try to retrieve existing folder by name - folder_id = client.folders.retrieve_by_name("PDF Documents") - print(f"Using existing folder: {folder_id}\n") -except ApiError as e: - # If folder doesn't exist (404), create it - if e.status_code == 404: - folder = client.folders.create( - name="PDF Documents", - description="A folder containing PDF files for the agent to read", - ) - folder_id = folder.id - print(f"Created folder: {folder_id}\n") - else: - raise -``` - - - -``` -Created folder: folder-a1b2c3d4-e5f6-7890-abcd-ef1234567890 -``` -If the folder already exists, you'll see: -``` -Using existing folder: folder-a1b2c3d4-e5f6-7890-abcd-ef1234567890 -``` - - -### Step 3: Download and Upload a PDF - -Let's download a sample PDF (the MemGPT research paper) and upload it to the folder. Letta will automatically extract the text content using OCR. - - -```typescript TypeScript -import * as fs from 'fs'; -import * as https from 'https'; - -// Download the PDF if it doesn't exist locally -const pdfFilename = "memgpt.pdf"; - -if (!fs.existsSync(pdfFilename)) { - console.log(`Downloading ${pdfFilename}...`); - - await new Promise((resolve, reject) => { - const file = fs.createWriteStream(pdfFilename); - https.get("https://arxiv.org/pdf/2310.08560", (response) => { - response.pipe(file); - file.on('finish', () => { - file.close(); - console.log("Download complete\n"); - resolve(); - }); - file.on('error', reject); - }).on('error', reject); - }); -} - -// Upload the PDF to the folder -// API Reference: https://docs.letta.com/api-reference/folders/files/upload -const uploadedFile = await client.folders.files.upload( - fs.createReadStream(pdfFilename), - folderId, - { duplicateHandling: "skip" } -); - -console.log(`Uploaded PDF: ${uploadedFile.id}\n`); -``` -```python Python -import requests - -# Download the PDF if it doesn't exist locally -pdf_filename = "memgpt.pdf" - -if not os.path.exists(pdf_filename): - print(f"Downloading {pdf_filename}...") - response = requests.get("https://arxiv.org/pdf/2310.08560") - with open(pdf_filename, "wb") as f: - f.write(response.content) - print("Download complete\n") - -# Upload the PDF to the folder -# API Reference: https://docs.letta.com/api-reference/folders/files/upload -with open(pdf_filename, "rb") as f: - file = client.folders.files.upload( - folder_id=folder_id, - file=f, - duplicate_handling="skip", - ) - -print(f"Uploaded PDF: {file.id}\n") -``` - - - -``` -Downloading memgpt.pdf... -Download complete - -Uploaded PDF: file-a1b2c3d4-e5f6-7890-abcd-ef1234567890 -``` - - - -**PDF Processing**: Letta extracts text from PDFs using OCR automatically during upload. The extracted text becomes searchable and accessible to agents attached to the folder. - - -### Step 4: Create an Agent for Document Analysis - -Create an [agent](/guides/agents/overview) with a persona configured for analyzing documents. The agent's [memory blocks](/guides/agents/memory-blocks) define its purpose and capabilities: - - -```typescript TypeScript -// Create an agent configured to analyze documents -// API Reference: https://docs.letta.com/api-reference/agents/create -const agent = await client.agents.create({ - name: "pdf_assistant", - model: "openai/gpt-4o-mini", - memoryBlocks: [ - { - label: "persona", - value: "I am a helpful research assistant that analyzes PDF documents and answers questions about their content." - }, - { - label: "human", - value: "Name: User\nTask: Analyzing PDF documents" - } - ], -}); - -console.log(`Created agent: ${agent.id}\n`); -``` -```python Python -# Create an agent configured to analyze documents -# API Reference: https://docs.letta.com/api-reference/agents/create -agent = client.agents.create( - name="pdf_assistant", - model="openai/gpt-4o-mini", - memory_blocks=[ - { - "label": "persona", - "value": "I am a helpful research assistant that analyzes PDF documents and answers questions about their content." - }, - { - "label": "human", - "value": "Name: User\nTask: Analyzing PDF documents" - } - ], -) - -print(f"Created agent: {agent.id}\n") -``` - - - -``` -Created agent: agent-a1b2c3d4-e5f6-7890-abcd-ef1234567890 -``` - - -### Step 5: Attach the Folder to the Agent - -Attach the folder containing the PDF to the agent. This gives the agent the ability to search through all files in the folder: - - -```typescript TypeScript -// Attach the folder to the agent -// API Reference: https://docs.letta.com/api-reference/agents/folders/attach -await client.agents.folders.attach(agent.id, folderId); - -console.log(`Attached folder to agent\n`); -``` -```python Python -# Attach the folder to the agent -# API Reference: https://docs.letta.com/api-reference/agents/folders/attach -client.agents.folders.attach( - agent_id=agent.id, - folder_id=folder_id, -) - -print(f"Attached folder to agent\n") -``` - - - -``` -Attached folder to agent -``` - - - -Once a folder is attached, the agent can use search tools to retrieve relevant content from files in the folder. Learn more in the [Letta Filesystem guide](/guides/agents/filesystem). - - -### Step 6: Query the PDF Content - -Now ask the agent questions about the PDF. The agent will search through the document content to find relevant information: - - -```typescript TypeScript -// Ask the agent to summarize the PDF -// API Reference: https://docs.letta.com/api-reference/agents/messages/create -const response = await client.agents.messages.create(agent.id, { - messages: [{ - role: "user", - content: "Can you summarize the main ideas from the MemGPT paper?" - }] -}); - -for (const msg of response.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Assistant: ${msg.content}\n`); - } -} -``` -```python Python -# Ask the agent to summarize the PDF -# API Reference: https://docs.letta.com/api-reference/agents/messages/create -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "Can you summarize the main ideas from the MemGPT paper?"}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Assistant: {msg.content}\n") -``` - - - -``` -Assistant: The MemGPT paper introduces a system that enables LLMs to manage their own -memory hierarchy, similar to how operating systems manage memory. It addresses the limited -context window problem in large language models by introducing a memory management system -inspired by traditional operating systems. The key innovation is allowing LLMs to explicitly -move information between main context (limited) and external storage (unlimited), enabling -extended conversations and document analysis that exceed typical context limits. -``` - - -### Step 7: Ask Specific Questions - -You can continue the conversation to ask more specific questions about the document: - - -```typescript TypeScript -// Ask a specific question about the PDF content -const response2 = await client.agents.messages.create(agent.id, { - messages: [{ - role: "user", - content: "What problem does MemGPT solve?" - }] -}); - -for (const msg of response2.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Assistant: ${msg.content}\n`); - } -} -``` -```python Python -# Ask a specific question about the PDF content -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "What problem does MemGPT solve?"}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Assistant: {msg.content}\n") -``` - - - -``` -Assistant: MemGPT addresses the limited context window problem in large language models. -Traditional LLMs can only process a fixed amount of text at once (their context window), -which makes it difficult to maintain long conversations or analyze large documents. MemGPT -solves this by introducing a memory management system that allows the model to intelligently -move information between its limited context and unlimited external storage, enabling -extended conversations and document analysis beyond typical context limits. -``` - - -## Complete Example - -Here's the full code in one place that you can run: - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; -import * as fs from 'fs'; -import * as https from 'https'; - -async function main() { - // Initialize client - const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - - // Create folder (or use existing one) - let folderId: string; - try { - folderId = await client.folders.retrieveByName("PDF Documents"); - console.log(`Using existing folder: ${folderId}\n`); - } catch (error: any) { - if (error.statusCode === 404) { - const folder = await client.folders.create({ - name: "PDF Documents", - description: "A folder containing PDF files for the agent to read", - }); - folderId = folder.id; - console.log(`Created folder: ${folderId}\n`); - } else { - throw error; - } - } - - // Download and upload PDF - const pdfFilename = "memgpt.pdf"; - - if (!fs.existsSync(pdfFilename)) { - console.log(`Downloading ${pdfFilename}...`); - await new Promise((resolve, reject) => { - const file = fs.createWriteStream(pdfFilename); - https.get("https://arxiv.org/pdf/2310.08560", (response) => { - response.pipe(file); - file.on('finish', () => { - file.close(); - console.log("Download complete\n"); - resolve(); - }); - file.on('error', reject); - }).on('error', reject); - }); - } - - const uploadedFile = await client.folders.files.upload( - fs.createReadStream(pdfFilename), - folderId, - { duplicateHandling: "skip" } - ); - - console.log(`Uploaded PDF: ${uploadedFile.id}\n`); - - // Create agent - const agent = await client.agents.create({ - name: "pdf_assistant", - model: "openai/gpt-4o-mini", - memoryBlocks: [ - { - label: "persona", - value: "I am a helpful research assistant that analyzes PDF documents and answers questions about their content." - }, - { - label: "human", - value: "Name: User\nTask: Analyzing PDF documents" - } - ], - }); - - console.log(`Created agent: ${agent.id}\n`); - - // Attach folder to agent - await client.agents.folders.attach(agent.id, folderId); - - console.log(`Attached folder to agent\n`); - - // Query the PDF - const response = await client.agents.messages.create(agent.id, { - messages: [{ - role: "user", - content: "Can you summarize the main ideas from the MemGPT paper?" - }] - }); - - for (const msg of response.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Assistant: ${msg.content}\n`); - } - } - - // Ask specific question - const response2 = await client.agents.messages.create(agent.id, { - messages: [{ - role: "user", - content: "What problem does MemGPT solve?" - }] - }); - - for (const msg of response2.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Assistant: ${msg.content}\n`); - } - } -} - -main(); -``` -```python Python -from letta_client import Letta -from letta_client.core.api_error import ApiError -import os -import requests - -# Initialize client -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# Create folder (or use existing one) -try: - folder_id = client.folders.retrieve_by_name("PDF Documents") - print(f"Using existing folder: {folder_id}\n") -except ApiError as e: - if e.status_code == 404: - folder = client.folders.create( - name="PDF Documents", - description="A folder containing PDF files for the agent to read", - ) - folder_id = folder.id - print(f"Created folder: {folder_id}\n") - else: - raise - -# Download and upload PDF -pdf_filename = "memgpt.pdf" - -if not os.path.exists(pdf_filename): - print(f"Downloading {pdf_filename}...") - response = requests.get("https://arxiv.org/pdf/2310.08560") - with open(pdf_filename, "wb") as f: - f.write(response.content) - print("Download complete\n") - -with open(pdf_filename, "rb") as f: - file = client.folders.files.upload( - folder_id=folder_id, - file=f, - duplicate_handling="skip", - ) - -print(f"Uploaded PDF: {file.id}\n") - -# Create agent -agent = client.agents.create( - name="pdf_assistant", - model="openai/gpt-4o-mini", - memory_blocks=[ - { - "label": "persona", - "value": "I am a helpful research assistant that analyzes PDF documents and answers questions about their content." - }, - { - "label": "human", - "value": "Name: User\nTask: Analyzing PDF documents" - } - ], -) - -print(f"Created agent: {agent.id}\n") - -# Attach folder to agent -client.agents.folders.attach( - agent_id=agent.id, - folder_id=folder_id, -) - -print(f"Attached folder to agent\n") - -# Query the PDF -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "Can you summarize the main ideas from the MemGPT paper?"}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Assistant: {msg.content}\n") - -# Ask specific question -response = client.agents.messages.create( - agent_id=agent.id, - messages=[{"role": "user", "content": "What problem does MemGPT solve?"}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Assistant: {msg.content}\n") -``` - - -## Key Concepts - - - -Folders in the Letta Filesystem organize and group files, making them easy to manage and attach to agents - - - -PDFs are automatically processed using OCR to extract searchable text content during upload - - - -Attaching folders gives agents search capabilities to retrieve relevant content from files - - - -Agents use search tools to find relevant passages in documents when answering questions - - - -## Use Cases - - - -Upload academic papers and have agents summarize findings, extract key concepts, or compare methodologies. - - - -Build customer support systems that answer questions based on product documentation or manuals. - - - -Analyze contracts, agreements, or legal documents to extract clauses, identify risks, or summarize terms. - - - -Process multiple PDFs to build a searchable knowledge base that agents can query for information. - - - -## Next Steps - - - -Learn more about the Letta Filesystem, folders, and managing file uploads - - - -Deep dive into building stateful agents with Letta - - - -Understand how memory blocks shape agent behavior and knowledge - - diff --git a/fern/pages/tutorials/shared-memory-blocks.mdx b/fern/pages/tutorials/shared-memory-blocks.mdx deleted file mode 100644 index 901ee729..00000000 --- a/fern/pages/tutorials/shared-memory-blocks.mdx +++ /dev/null @@ -1,553 +0,0 @@ ---- -title: "Shared Memory Blocks" -subtitle: Enable multi-agent collaboration through shared memory -slug: tutorials/shared-memory-blocks ---- - -## Overview - -Memory blocks can be shared between multiple agents, enabling powerful multi-agent collaboration patterns. When a block is shared, all attached agents can read and write to it, creating a common workspace for coordinating information and tasks. - -This tutorial demonstrates how to: -- Create memory blocks that multiple agents can access -- Build collaborative workflows where agents contribute different information -- Use read-only blocks to provide shared context without allowing modifications -- Understand how memory tools handle concurrent updates - -By the end of this guide, you'll understand how to build simple multi-agent systems where agents work together by sharing memory. - - -**This tutorial uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled. - -The `web_search` tool used in this tutorial requires an `EXA_API_KEY` environment variable when self-hosting. You can learn more about self-hosting [here](/guides/selfhosting). - - -## What You'll Learn - -- Creating standalone memory blocks for sharing -- Attaching the same block to multiple agents -- Building collaborative workflows with shared memory -- Using read-only blocks for policies and system information -- Understanding how memory tools handle concurrent updates - -## Prerequisites - -You will need to install `letta-client` to interface with a Letta server: - - -```bash TypeScript -npm install @letta-ai/letta-client -``` -```bash Python -pip install letta-client -``` - - -## Steps - -### Step 1: Initialize Client - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -// Initialize the Letta client using LETTA_API_KEY environment variable -const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - -// If self-hosting, specify the base URL: -// const client = new LettaClient({ baseUrl: "http://localhost:8283" }); -``` -```python Python -from letta_client import Letta -import os - -# Initialize the Letta client using LETTA_API_KEY environment variable -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# If self-hosting, specify the base URL: -# client = Letta(base_url="http://localhost:8283") -``` - - -### Step 2: Create a Shared Memory Block - -Create a standalone memory block that will be shared between multiple agents. This block will serve as a collaborative workspace where both agents can contribute information. - -We're going to give the block the label "organization" to indicate that it contains information about some organization. The starting value of this block is "Organization: Letta" to give the agents a starting point to work from. - - -```typescript TypeScript -// Create a memory block that will be shared between agents -// API Reference: https://docs.letta.com/api-reference/blocks/create -const block = await client.blocks.create({ - label: "organization", - value: "Organization: Letta", - limit: 4000, -}); - -console.log(`Created shared block: ${block.id}\n`); -``` -```python Python -# Create a memory block that will be shared between agents -# API Reference: https://docs.letta.com/api-reference/blocks/create -block = client.blocks.create( - label="organization", - value="Organization: Letta", - limit=4000, -) - -print(f"Created shared block: {block.id}\n") -``` - - -### Step 3: Create Agents with Shared Block - -Create two agents that will both have access to the same memory block. You can attach blocks during creation using `block_ids` or later using the `attach` method. - -We'll provide each agent with the `web_search` tool to search the web for information. This tool is built-in to Letta. If you are self-hosting, you will need to set an `EXA_API_KEY` environment variable for either the server or the agent to use this tool. - - -```typescript TypeScript -// Create first agent with block attached during creation -// API Reference: https://docs.letta.com/api-reference/agents/create -const agent1 = await client.agents.create({ - name: "agent1", - model: "openai/gpt-4o-mini", - blockIds: [block.id], - tools: ["web_search"], -}); -console.log(`Created agent1: ${agent1.id}`); - -// Create second agent and attach block afterward -const agent2 = await client.agents.create({ - name: "agent2", - model: "openai/gpt-4o-mini", - tools: ["web_search"], -}); -console.log(`Created agent2: ${agent2.id}`); - -// Attach the shared block to agent2 -// API Reference: https://docs.letta.com/api-reference/agents/blocks/attach -await client.agents.blocks.attach(agent2.id, block.id); -console.log(`Attached block to agent2\n`); -``` -```python Python -# Create first agent with block attached during creation -# API Reference: https://docs.letta.com/api-reference/agents/create -agent1 = client.agents.create( - name="agent1", - model="openai/gpt-4o-mini", - block_ids=[block.id], - tools=["web_search"], -) -print(f"Created agent1: {agent1.id}") - -# Create second agent and attach block afterward -agent2 = client.agents.create( - name="agent2", - model="openai/gpt-4o-mini", - tools=["web_search"], -) -print(f"Created agent2: {agent2.id}") - -# Attach the shared block to agent2 -# API Reference: https://docs.letta.com/api-reference/agents/blocks/attach -agent2 = client.agents.blocks.attach( - agent_id=agent2.id, - block_id=block.id, -) -print(f"Attached block to agent2: {agent2.id}") -``` - - -### Step 4: Have Agents Collaborate via Shared Memory - -Now let's have both agents research different topics and contribute their findings to the shared memory block. - -- **Agent 1**: Searches for information about the connection between memory blocks and Letta. -- **Agent 2**: Searches for information about the origin of Letta. - -We're going to ask each agent to search for different information and insert what they learn into the shared memory block, prepended with the agent's name (either `Agent1:` or `Agent2:`). - - -```typescript TypeScript -// Agent1 searches for information about memory blocks -// API Reference: https://docs.letta.com/api-reference/agents/messages/create -const response1 = await client.agents.messages.create(agent1.id, { - messages: [{ - role: "user", - content: `Find information about the connection between memory blocks and Letta. -Insert what you learn into the memory block, prepended with "Agent1: ".` - }] -}, { - timeoutInSeconds: 120 // Web search can take time -}); - -for (const msg of response1.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Agent1 response: ${msg.content}`); - } - if (msg.messageType === "tool_call_message") { - console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`); - } -} - -// Agent2 searches for information about Letta's origin -const response2 = await client.agents.messages.create(agent2.id, { - messages: [{ - role: "user", - content: `Find information about the origin of Letta. -Insert what you learn into the memory block, prepended with "Agent2: ".` - }] -}, { - timeoutInSeconds: 120 // Web search can take time -}); - -for (const msg of response2.messages) { - if (msg.messageType === "assistant_message") { - console.log(`Agent2 response: ${msg.content}`); - } - if (msg.messageType === "tool_call_message") { - console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`); - } -} -``` -```python Python -# Agent1 searches for information about memory blocks -# API Reference: https://docs.letta.com/api-reference/agents/messages/create -response = client.agents.messages.create( - agent_id=agent1.id, - messages=[{"role": "user", "content": """ - Find information about the connection between memory blocks and Letta. - Insert what you learn into the memory block, prepended with "Agent1: ". -"""}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Agent1 response: {msg.content}") - if msg.message_type == "tool_call_message": - print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})") - -# Agent2 searches for information about Letta's origin -response = client.agents.messages.create( - agent_id=agent2.id, - messages=[{"role": "user", "content": """ - Find information about the origin of Letta. - Insert what you learn into the memory block, prepended with "Agent2: ". -"""}], -) - -for msg in response.messages: - if msg.message_type == "assistant_message": - print(f"Agent2 response: {msg.content}") - if msg.message_type == "tool_call_message": - print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})") -``` - - -### Step 5: Inspect the Shared Memory - -Let's retrieve the shared memory block to see both agents' contributions: - - -```typescript TypeScript -// Retrieve the shared block to see what both agents learned -// API Reference: https://docs.letta.com/api-reference/blocks/retrieve -const updatedBlock = await client.blocks.retrieve(block.id); - -console.log("==== Updated block ===="); -console.log(updatedBlock.value); -console.log("=======================\n"); -``` -```python Python -# Retrieve the shared block to see what both agents learned -# API Reference: https://docs.letta.com/api-reference/blocks/retrieve -updated_block = client.blocks.retrieve(block.id) - -print(f"==== Updated block ====") -print(updated_block.value) -print(f"=======================") -``` - - -The output should be something like this: - -> Organization: Letta -> -> Agent1: Memory blocks are integral to the Letta framework for managing context in large language models (LLMs). They serve as structured units that enhance an agent's ability to maintain long-term memory and coherence across interactions. Specifically, Letta utilizes memory blocks to organize context into discrete categories, such as "human" memory (user preferences and facts) and "persona" memory (the agent's self-concept and traits). This structured approach allows agents to edit and persist important information, improving performance, personalization, and controllability. By effectively managing the context window through these memory blocks, Letta enhances the overall functionality and adaptability of its LLM agents. -> -> Agent2: Letta originated as MemGPT, a research project focused on building -> stateful AI agents with long-term memory capabilities. It evolved into a -> platform for building and deploying production-ready agents. - -Note that each agent has placed their information into the block, prepended with their name. This is a simple way to identify who contributed what to the block. You don't have to prepend agent identifiers to the block, we only did this for demonstration purposes. - - -**Understanding concurrent updates**: Memory tools handle concurrent updates differently: -- `memory_insert` is additive and the most robust for multi-agent systems. Multiple agents can insert content simultaneously without conflicts, as each insert simply appends to the block. -- `memory_replace` validates that the exact old content exists before replacing it. If another agent modifies the content being replaced, the tool call fails with a validation error, preventing accidental overwrites. -- `memory_rethink` performs a complete rewrite of the entire block and follows "most recent write wins." This is a destructive operation - use cautiously in multi-agent systems as it can overwrite other agents' contributions. - - -### Step 6: Using Read-Only Blocks - -Read-only blocks are useful for sharing policies, system information, or terms of service that agents should reference but not modify. - - -```typescript TypeScript -// Create a read-only block for policies or system information -// API Reference: https://docs.letta.com/api-reference/blocks/create -const readOnlyBlock = await client.blocks.create({ - label: "read_only_block", - value: "This is a read-only block.", - readOnly: true, -}); - -// Attach the read-only block to an agent -const readOnlyAgent = await client.agents.create({ - name: "read_only_agent", - model: "openai/gpt-4o-mini", - blockIds: [readOnlyBlock.id], -}); - -console.log(`Created read-only agent: ${readOnlyAgent.id}`); -``` -```python Python -# Create a read-only block for policies or system information -# API Reference: https://docs.letta.com/api-reference/blocks/create -read_only_block = client.blocks.create( - label="read_only_block", - value="This is a read-only block.", - read_only=True, -) - -# Attach the read-only block to an agent -read_only_agent = client.agents.create( - name="read_only_agent", - model="openai/gpt-4o-mini", - block_ids=[read_only_block.id], -) - -print(f"Created read-only agent: {read_only_agent.id}") -``` - - - -Agents can see read-only blocks in their context but cannot modify them using memory tools. This is useful for organizational policies, system configuration, or any information that should be reference-only. - - -## Complete Example - -Here's the full code in one place that you can run: - - -```typescript TypeScript -import { LettaClient } from '@letta-ai/letta-client'; - -async function main() { - // Initialize client - const client = new LettaClient({ token: process.env.LETTA_API_KEY }); - - // Create shared block - const block = await client.blocks.create({ - label: "organization", - value: "Organization: Letta", - limit: 4000, - }); - - console.log(`Created shared block: ${block.id}\n`); - - // Create agents with shared block - const agent1 = await client.agents.create({ - name: "agent1", - model: "openai/gpt-4o-mini", - blockIds: [block.id], - tools: ["web_search"], - }); - - const agent2 = await client.agents.create({ - name: "agent2", - model: "openai/gpt-4o-mini", - tools: ["web_search"], - }); - - await client.agents.blocks.attach(agent2.id, block.id); - - console.log(`Created agents: ${agent1.id}, ${agent2.id}\n`); - - // Agent1 contributes information - const response1 = await client.agents.messages.create(agent1.id, { - messages: [{ - role: "user", - content: `Find information about the connection between memory blocks and Letta. -Insert what you learn into the memory block, prepended with "Agent1: ".` - }] - }, { - timeoutInSeconds: 120 // Web search can take time - }); - - // Agent2 contributes information - const response2 = await client.agents.messages.create(agent2.id, { - messages: [{ - role: "user", - content: `Find information about the origin of Letta. -Insert what you learn into the memory block, prepended with "Agent2: ".` - }] - }, { - timeoutInSeconds: 120 // Web search can take time - }); - - // Inspect the shared memory - const updatedBlock = await client.blocks.retrieve(block.id); - console.log("==== Updated block ===="); - console.log(updatedBlock.value); - console.log("=======================\n"); - - // Create read-only block - const readOnlyBlock = await client.blocks.create({ - label: "policies", - value: "Company Policy: Always be helpful and respectful.", - readOnly: true, - }); - - const readOnlyAgent = await client.agents.create({ - name: "policy_agent", - model: "openai/gpt-4o-mini", - blockIds: [readOnlyBlock.id], - }); - - console.log(`Created read-only agent: ${readOnlyAgent.id}`); -} - -main(); -``` -```python Python -from letta_client import Letta -import os - -# Initialize client -client = Letta(token=os.getenv("LETTA_API_KEY")) - -# Create shared block -block = client.blocks.create( - label="organization", - value="Organization: Letta", - limit=4000, -) - -print(f"Created shared block: {block.id}\n") - -# Create agents with shared block -agent1 = client.agents.create( - name="agent1", - model="openai/gpt-4o-mini", - block_ids=[block.id], - tools=["web_search"], -) - -agent2 = client.agents.create( - name="agent2", - model="openai/gpt-4o-mini", - tools=["web_search"], -) - -agent2 = client.agents.blocks.attach( - agent_id=agent2.id, - block_id=block.id, -) - -print(f"Created agents: {agent1.id}, {agent2.id}\n") - -# Agent1 contributes information -response = client.agents.messages.create( - agent_id=agent1.id, - messages=[{"role": "user", "content": """ - Find information about the connection between memory blocks and Letta. - Insert what you learn into the memory block, prepended with "Agent1: ". -"""}], -) - -# Agent2 contributes information -response = client.agents.messages.create( - agent_id=agent2.id, - messages=[{"role": "user", "content": """ - Find information about the origin of Letta. - Insert what you learn into the memory block, prepended with "Agent2: ". -"""}], -) - -# Inspect the shared memory -updated_block = client.blocks.retrieve(block.id) -print(f"==== Updated block ====") -print(updated_block.value) -print(f"=======================") - -# Create read-only block -read_only_block = client.blocks.create( - label="policies", - value="Company Policy: Always be helpful and respectful.", - read_only=True, -) - -read_only_agent = client.agents.create( - name="policy_agent", - model="openai/gpt-4o-mini", - block_ids=[read_only_block.id], -) - -print(f"Created read-only agent: {read_only_agent.id}") -``` - - -## Key Concepts - - - -Multiple agents can access the same memory block, enabling collaboration and information sharing - - - -Blocks can be attached during agent creation with block_ids or later using the attach method - - - -Memory tools handle concurrent updates differently - insert is additive, replace validates, rethink overwrites - - - -Prevent agent modifications while still providing shared context like policies or system information - - - -## Use Cases - - - -Have multiple agents research different topics and contribute findings to a shared knowledge base. - - - -Create read-only blocks with company policies, terms of service, or system guidelines that all agents reference. - - - -Use shared blocks as a coordination layer where agents update task status and communicate progress. - - - -Enable agents with different specializations to work together by sharing context and intermediate results. - - - -## Next Steps - - - -Learn more about memory blocks, including managing and updating them - - - -Understand how to dynamically control agent access to memory blocks - - diff --git a/fern/pages/voice/voice.mdx b/fern/pages/voice/voice.mdx deleted file mode 100644 index b936fb2f..00000000 --- a/fern/pages/voice/voice.mdx +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Voice Agents -slug: guides/voice/overview ---- - - -Voice agents support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta). - - -All Letta agents can be connected to a voice provider by using the OpenAI-compatible streaming chat completions endpoint at `http://localhost:8283/v1/chat/completions`. Any standard Letta agent can be used for voice applications. - - -The legacy `/v1/voice-beta/` endpoint has been deprecated. Please use the OpenAI-compatible `/v1/chat/completions` endpoint with `stream=true` for voice applications. - - -## Creating a voice agent -You can create a voice agent using the standard Letta agent creation flow: - -```python -from letta_client import Letta - -client = Letta(token=os.getenv('LETTA_API_KEY')) - -# create the Letta agent -agent = client.agents.create( - memory_blocks=[ - {"value": "Name: ?", "label": "human"}, - {"value": "You are a helpful assistant.", "label": "persona"}, - ], - model="openai/gpt-4o-mini", # Use 4o-mini for speed - embedding="openai/text-embedding-3-small", -) -``` - -You can attach additional tools and blocks to this agent just as you would any other Letta agent. diff --git a/fern/pages/voice/voice_livekit.mdx b/fern/pages/voice/voice_livekit.mdx deleted file mode 100644 index 80641123..00000000 --- a/fern/pages/voice/voice_livekit.mdx +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: Connecting with Livekit Agents -slug: guides/voice/livekit ---- - - -Voice agents support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta). - - -You can build an end-to-end stateful voice agent using Letta and Livekit. You can see a full example in the [letta-voice](https://github.com/letta-ai/letta-voice) repository. - -For this example, you will need accounts with the following providers: -* [Livekit](https://livekit.io/) for handling the voice connection -* [Deepgram](https://deepgram.com/) for speech-to-text -* [Cartesia](https://cartesia.io/) for text-to-speech - -You will also need to set up the following environment variables (or create a `.env` file): -```sh -LETTA_API_KEY=... # Letta Cloud API key (if using cloud) - -LIVEKIT_URL=wss://.livekit.cloud # Livekit URL -LIVEKIT_API_KEY=... # Livekit API key -LIVEKIT_API_SECRET=... # Livekit API secret - -DEEPGRAM_API_KEY=... # Deepgram API key -CARTESIA_API_KEY=... # Cartesia API key -``` - -## Connecting to Letta Cloud -To connect to LiveKit, you can use the Letta connector `openai.LLM.with_letta` and pass in the `agent_id` of your voice agent. The connector uses Letta's OpenAI-compatible streaming chat completions endpoint (`/v1/chat/completions`) under the hood. - -Below is an example defining an entrypoint for a Livekit agent with Letta: -```python -import os -from dotenv import load_dotenv -from livekit import agents -from livekit.agents import AgentSession, Agent, AutoSubscribe -from livekit.plugins import ( - openai, - cartesia, - deepgram, -) -load_dotenv() - -async def entrypoint(ctx: agents.JobContext): - agent_id = os.environ.get('LETTA_AGENT_ID') - print(f"Agent id: {agent_id}") - session = AgentSession( - llm=openai.LLM.with_letta( - agent_id=agent_id, - ), - stt=deepgram.STT(), - tts=cartesia.TTS(), - ) - - await session.start( - room=ctx.room, - agent=Agent(instructions=""), # instructions should be set in the Letta agent - ) - - session.say("Hi, what's your name?") - await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) -``` -You can see the full script [here](https://github.com/letta-ai/letta-voice/blob/main/main.py). - -## Connecting to a self-hosted Letta server -You can also connect to a self-hosted server by specifying a `base_url`. To use LiveKit, your Letta sever needs to run with HTTPs. The easiest way to do this is by connecting ngrok to your Letta server. - -### Setting up `ngrok` -If you are self-hosting the Letta server locally (at `localhost`), you will need to use `ngrok` to expose your Letta server to the internet: -1. Create an account on [ngrok](https://ngrok.com/) -2. Create an auth token and add it into your CLI -``` -ngrok config add-authtoken -``` -3. Point your ngrok server to your Letta server: -``` -ngrok http http://localhost:8283 -``` -Now, you should have a forwarding URL like `https://.ngrok.app`. - -### Connecting LiveKit to a self-hosted Letta server -To connect a LiveKit agent to a self-hosted Letta server, you can use the same code as above, but with the `base_url` parameter set to the forwarding URL you got from ngrok (or whatever HTTPS URL the Letta server is running on). - -```python -import os -from dotenv import load_dotenv -from livekit import agents -from livekit.agents import AgentSession, Agent, AutoSubscribe -from livekit.plugins import ( - openai, - cartesia, - deepgram, -) -load_dotenv() - -async def entrypoint(ctx: agents.JobContext): - agent_id = os.environ.get('LETTA_AGENT_ID') - print(f"Agent id: {agent_id}") - session = AgentSession( - llm=openai.LLM.with_letta( - agent_id=agent_id, - base_url="https://.ngrok.app", # point to your Letta server - ), - stt=deepgram.STT(), - tts=cartesia.TTS(), - ) - - await session.start( - room=ctx.room, - agent=Agent(instructions=""), # instructions should be set in the Letta agent - ) - - session.say("Hi, what's your name?") - await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) -``` -You can see the full script [here](https://github.com/letta-ai/letta-voice/blob/main/main.py). -` diff --git a/fern/pages/voice/voice_vapi.mdx b/fern/pages/voice/voice_vapi.mdx deleted file mode 100644 index 6a635d57..00000000 --- a/fern/pages/voice/voice_vapi.mdx +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: Connecting with Vapi -slug: guides/voice/vapi ---- - - -Voice agents support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta). - - -## Connecting to Letta Cloud - - - Add Letta Cloud as an integration by entering your `LETTA_API_KEY` into the "Custom LLM" field at https://dashboard.vapi.ai/settings/integrations. - - - - Create a Vapi assistant at https://dashboard.vapi.ai/assistants/ and use the "Blank Template". - - - - Select "Custom LLM" for the model, and enter in the chat completions endpoint: https://api.letta.com/v1/chat/completions - - In the request body, set the "model" field to your agent ID in the format `agent-{AGENT-ID}`. - - - - The legacy `/v1/voice-beta/{AGENT-ID}` endpoint has been deprecated. Use `/v1/chat/completions` with the agent ID specified in the "model" field instead. - - - You can now interact with your agent through Vapi, including calling and texting your agent! - - - - -## Connecting to a self-hosted Letta server -To connect to a self-hosted server, you will need to have a internal accessible endpoint for your Letta server and add any authentication tokens (if they exist) instead of `LETTA_API_KEY`. We recommend using ngrok to expose your Letta server to the internet. - - - -If you are self-hosting the Letta server locally (at `localhost`), you will need to use `ngrok` to expose your Letta server to the internet: -1. Create an account on [ngrok](https://ngrok.com/) -2. Create an auth token and add it into your CLI -``` -ngrok config add-authtoken -``` -3. Point your ngrok server to your Letta server: -``` -ngrok http http://localhost:8283 -``` -Now, you should have a forwarding URL like `https://{YOUR_FORWARDING_URL}.ngrok.app`. - - - Create a Vapi assistant at https://dashboard.vapi.ai/assistants/ and use the "Blank Template". - - - - Select "Custom LLM" for the model, and enter in the chat completions endpoint: `https://{YOUR_FORWARDING_URL}.ngrok.app/v1/chat/completions` - - In the request body, set the "model" field to your agent ID in the format `agent-{AGENT_ID}`. - - The legacy `/v1/voice-beta/{AGENT_ID}` endpoint has been deprecated. Use `/v1/chat/completions` with the agent ID specified in the "model" field instead. - - - You can now interact with your agent through Vapi, including calling and texting your agent! - - diff --git a/fern/scripts/prepare-openapi.ts b/fern/scripts/prepare-openapi.ts deleted file mode 100644 index d449f221..00000000 --- a/fern/scripts/prepare-openapi.ts +++ /dev/null @@ -1,218 +0,0 @@ -import * as fs from 'fs'; -import * as path from 'path'; - -import { omit } from 'lodash'; -import { execSync } from 'child_process'; -import { merge, isErrorResult } from 'openapi-merge'; -import type { Swagger } from 'atlassian-openapi'; -import { RESTRICTED_ROUTE_BASE_PATHS } from '@letta-cloud/sdk-core'; - -const lettaWebOpenAPIPath = path.join( - __dirname, - '..', - '..', - '..', - 'web', - 'autogenerated', - 'letta-web-openapi.json', -); -const lettaAgentsAPIPath = path.join( - __dirname, - '..', - '..', - 'letta', - 'server', - 'openapi_letta.json', -); - -const lettaWebOpenAPI = JSON.parse( - fs.readFileSync(lettaWebOpenAPIPath, 'utf8'), -) as Swagger.SwaggerV3; -const lettaAgentsAPI = JSON.parse( - fs.readFileSync(lettaAgentsAPIPath, 'utf8'), -) as Swagger.SwaggerV3; - -// removes any routes that are restricted -lettaAgentsAPI.paths = Object.fromEntries( - Object.entries(lettaAgentsAPI.paths).filter(([path]) => - RESTRICTED_ROUTE_BASE_PATHS.every( - (restrictedPath) => !path.startsWith(restrictedPath), - ), - ), -); - -const lettaAgentsAPIWithNoEndslash = Object.keys(lettaAgentsAPI.paths).reduce( - (acc, path) => { - const pathWithoutSlash = path.endsWith('/') - ? path.slice(0, path.length - 1) - : path; - acc[pathWithoutSlash] = lettaAgentsAPI.paths[path]; - return acc; - }, - {} as Swagger.SwaggerV3['paths'], -); - -// remove duplicate paths, delete from letta-web-openapi if it exists in sdk-core -// some paths will have an extra / at the end, so we need to remove that as well -lettaWebOpenAPI.paths = Object.fromEntries( - Object.entries(lettaWebOpenAPI.paths).filter(([path]) => { - const pathWithoutSlash = path.endsWith('/') - ? path.slice(0, path.length - 1) - : path; - return !lettaAgentsAPIWithNoEndslash[pathWithoutSlash]; - }), -); - -const agentStatePathsToOverride: Array<[string, string]> = [ - ['/v1/templates/{project}/{template_version}/agents', '201'], - ['/v1/agents/search', '200'], -]; - -for (const [path, responseCode] of agentStatePathsToOverride) { - if (lettaWebOpenAPI.paths[path]?.post?.responses?.[responseCode]) { - // Get direct reference to the schema object - const responseSchema = - lettaWebOpenAPI.paths[path].post.responses[responseCode]; - const contentSchema = responseSchema.content['application/json'].schema; - - // Replace the entire agents array schema with the reference - if (contentSchema.properties?.agents) { - contentSchema.properties.agents = { - type: 'array', - items: { - $ref: '#/components/schemas/AgentState', - }, - }; - } - } -} - -// go through the paths and remove "user_id"/"actor_id" from the headers -for (const path of Object.keys(lettaAgentsAPI.paths)) { - for (const method of Object.keys(lettaAgentsAPI.paths[path])) { - // @ts-expect-error - a - if (lettaAgentsAPI.paths[path][method]?.parameters) { - // @ts-expect-error - a - lettaAgentsAPI.paths[path][method].parameters = lettaAgentsAPI.paths[ - path - ][method].parameters.filter( - (param: Record) => - param.in !== 'header' || - ( - param.name !== 'user_id' && - param.name !== 'User-Agent' && - param.name !== 'X-Project-Id' && - param.name !== 'X-Stainless-Package-Version' && - !param.name.startsWith('X-Experimental') - ), - ); - } - } -} - -const result = merge([ - { - oas: lettaAgentsAPI, - }, - { - oas: lettaWebOpenAPI, - }, -]); - -if (isErrorResult(result)) { - console.error(`${result.message} (${result.type})`); - process.exit(1); -} - -result.output.openapi = '3.1.0'; -result.output.info = { - title: 'Letta API', - version: '1.0.0', -}; - -result.output.servers = [ - { - url: 'https://app.letta.com', - description: 'Letta Cloud', - }, - { - url: 'http://localhost:8283', - description: 'Self-hosted', - }, -]; - -result.output.components = { - ...result.output.components, - securitySchemes: { - bearerAuth: { - type: 'http', - scheme: 'bearer', - }, - }, -}; - -result.output.security = [ - ...(result.output.security || []), - { - bearerAuth: [], - }, -]; - -// omit all instances of "user_id" from the openapi.json file -function deepOmitPreserveArrays(obj: unknown, key: string): unknown { - if (Array.isArray(obj)) { - return obj.map((item) => deepOmitPreserveArrays(item, key)); - } - - if (typeof obj !== 'object' || obj === null) { - return obj; - } - - if (key in obj) { - return omit(obj, key); - } - - return Object.fromEntries( - Object.entries(obj).map(([k, v]) => [k, deepOmitPreserveArrays(v, key)]), - ); -} - -// eslint-disable-next-line @typescript-eslint/ban-ts-comment -// @ts-ignore -result.output.components = deepOmitPreserveArrays( - result.output.components, - 'user_id', -); - -// eslint-disable-next-line @typescript-eslint/ban-ts-comment -// @ts-ignore -result.output.components = deepOmitPreserveArrays( - result.output.components, - 'actor_id', -); - -// eslint-disable-next-line @typescript-eslint/ban-ts-comment -// @ts-ignore -result.output.components = deepOmitPreserveArrays( - result.output.components, - 'organization_id', -); - -fs.writeFileSync( - path.join(__dirname, '..', 'openapi.json'), - JSON.stringify(result.output, null, 2), -); - -function formatOpenAPIJson() { - const openApiPath = path.join(__dirname, '..', 'openapi.json'); - - try { - execSync(`npx prettier --write "${openApiPath}"`, { stdio: 'inherit' }); - console.log('Successfully formatted openapi.json with Prettier'); - } catch (error) { - console.error('Error formatting openapi.json:', error); - process.exit(1); - } -} - -formatOpenAPIJson();