feat: Add built in firecrawl search tool (#2858)

This commit is contained in:
Matthew Zhou
2025-06-17 01:16:39 -07:00
committed by GitHub
parent 65530e8380
commit d991d37b04
8 changed files with 395 additions and 24 deletions

View File

@@ -125,7 +125,7 @@ MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
)
# Built in tools
BUILTIN_TOOLS = ["run_code", "web_search"]
BUILTIN_TOOLS = ["run_code", "web_search", "firecrawl_search"]
# Built in tools
FILES_TOOLS = ["open_file", "close_file", "grep", "search_files"]

View File

@@ -25,3 +25,32 @@ def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) ->
"""
raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
async def firecrawl_search(
query: str,
question: str,
limit: int = 5,
return_raw: bool = False,
) -> str:
"""
Search the web with the `query` and extract passages that answer the provided `question`.
Examples:
query -> "Tesla Q1 2025 earnings report PDF"
question -> "What was Tesla's net profit in Q1 2025?"
query -> "Letta API prebuilt tools core_memory_append"
question -> "What does the core_memory_append tool do in Letta?"
Args:
query (str): The raw web-search query.
question (str): The information goal to answer using the retrieved pages. Consider the context and intent of the conversation so far when forming the question.
limit (int, optional): Maximum number of URLs to fetch and analyse (must be > 0). Defaults to 5.
return_raw (bool, optional): If set to True, returns the raw content of the web page. This should be False unless otherwise specified by the user. Defaults to False.
Returns:
str: A JSON-encoded string containing ranked snippets with their source
URLs and relevance scores.
"""
raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")

View File

@@ -0,0 +1,25 @@
"""Prompts for Letta function tools."""
FIRECRAWL_SEARCH_SYSTEM_PROMPT = """You are an expert information extraction assistant. Your task is to analyze a document and extract the most relevant passages that answer a specific question, based on a search query context.
Guidelines:
1. Extract substantial, lengthy text snippets that directly address the question
2. Preserve important context and details in each snippet - err on the side of including more rather than less
3. Keep thinking very brief (1 short sentence) - focus on WHY the snippet is relevant, not WHAT it says
4. Include a concise summary of how the overall document relates to the question
5. Only extract snippets that actually answer or relate to the question - don't force relevance
6. Be comprehensive - include all relevant information, don't limit the number of snippets
7. Prioritize longer, information-rich passages over shorter ones"""
def get_firecrawl_search_user_prompt(query: str, question: str, markdown_content: str) -> str:
"""Generate the user prompt for firecrawl search analysis."""
return f"""Search Query: {query}
Question to Answer: {question}
Document Content:
```markdown
{markdown_content}
```
Please analyze this document and extract all relevant passages that help answer the question."""

View File

@@ -1,8 +1,13 @@
import asyncio
import json
from textwrap import shorten
from typing import Any, Dict, Literal, Optional
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel
from letta.constants import WEB_SEARCH_CLIP_CONTENT, WEB_SEARCH_INCLUDE_SCORE, WEB_SEARCH_SEPARATOR
from letta.functions.prompts import FIRECRAWL_SEARCH_SYSTEM_PROMPT, get_firecrawl_search_user_prompt
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig
@@ -10,7 +15,23 @@ from letta.schemas.tool import Tool
from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.schemas.user import User
from letta.services.tool_executor.tool_executor_base import ToolExecutor
from letta.settings import tool_settings
from letta.settings import model_settings, tool_settings
logger = get_logger(__name__)
class Citation(BaseModel):
"""A relevant text snippet extracted from a document."""
text: str
thinking: str # Reasoning of why this snippet is relevant
class DocumentAnalysis(BaseModel):
"""Analysis of a document's relevance to a search question."""
citations: List[Citation]
summary: str # Brief summary of how this document relates to the question
class LettaBuiltinToolExecutor(ToolExecutor):
@@ -27,14 +48,14 @@ class LettaBuiltinToolExecutor(ToolExecutor):
sandbox_config: Optional[SandboxConfig] = None,
sandbox_env_vars: Optional[Dict[str, Any]] = None,
) -> ToolExecutionResult:
function_map = {"run_code": self.run_code, "web_search": self.web_search}
function_map = {"run_code": self.run_code, "web_search": self.web_search, "firecrawl_search": self.firecrawl_search}
if function_name not in function_map:
raise ValueError(f"Unknown function: {function_name}")
# Execute the appropriate function
function_args_copy = function_args.copy() # Make a copy to avoid modifying the original
function_response = await function_map[function_name](**function_args_copy)
function_response = await function_map[function_name](agent_state=agent_state, **function_args_copy)
return ToolExecutionResult(
status="success",
@@ -42,7 +63,7 @@ class LettaBuiltinToolExecutor(ToolExecutor):
agent_state=agent_state,
)
async def run_code(self, code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
async def run_code(self, agent_state: "AgentState", code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
from e2b_code_interpreter import AsyncSandbox
if tool_settings.e2b_api_key is None:
@@ -70,7 +91,7 @@ class LettaBuiltinToolExecutor(ToolExecutor):
out["error"] = err
return out
async def web_search(agent_state: "AgentState", query: str) -> str:
async def web_search(self, agent_state: "AgentState", query: str) -> str:
"""
Search the web for information.
Args:
@@ -115,3 +136,176 @@ class LettaBuiltinToolExecutor(ToolExecutor):
formatted_blocks.append(block)
return WEB_SEARCH_SEPARATOR.join(formatted_blocks)
async def firecrawl_search(
self,
agent_state: "AgentState",
query: str,
question: str,
limit: int = 5,
return_raw: bool = False,
) -> str:
"""
Search the web with the `query` and extract passages that answer the provided `question`.
Examples:
query -> "Tesla Q1 2025 earnings report PDF"
question -> "What was Tesla's net profit in Q1 2025?"
query -> "Letta API prebuilt tools core_memory_append"
question -> "What does the core_memory_append tool do in Letta?"
Args:
query (str): The raw web-search query.
question (str): The information goal to answer using the retrieved pages.
limit (int, optional): Maximum number of URLs to fetch and analyse (must be > 0). Defaults to 5.
return_raw (bool, optional): If set to True, returns the raw content of the web page. This should be False unless otherwise specified by the user. Defaults to False.
Returns:
str: A JSON-encoded string containing ranked snippets with their source
URLs and relevance scores.
"""
try:
from firecrawl import AsyncFirecrawlApp, ScrapeOptions
except ImportError:
raise ImportError("firecrawl-py is not installed in the tool execution environment")
# Check if the API key exists on the agent state
agent_state_tool_env_vars = agent_state.get_agent_env_vars_as_dict()
firecrawl_api_key = agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") or tool_settings.firecrawl_api_key
if not firecrawl_api_key:
raise ValueError("FIRECRAWL_API_KEY is not set in environment or on agent_state tool exec environment variables.")
# Track which API key source was used
api_key_source = "agent_environment" if agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") else "system_settings"
if limit <= 0:
raise ValueError("limit must be greater than 0")
# Initialize Firecrawl client
app = AsyncFirecrawlApp(api_key=firecrawl_api_key)
# Perform the search, just request markdown
search_result = await app.search(query, limit=limit, scrape_options=ScrapeOptions(formats=["markdown"]))
if not search_result or not search_result.get("data"):
return json.dumps({"error": "No search results found."})
# Check if OpenAI API key is available for semantic parsing
if not return_raw and model_settings.openai_api_key:
try:
from openai import AsyncOpenAI
# Initialize OpenAI client
client = AsyncOpenAI(
api_key=model_settings.openai_api_key,
)
# Process each result with OpenAI concurrently
analysis_tasks = []
results_with_markdown = []
results_without_markdown = []
for result in search_result.get("data"):
if result.get("markdown"):
# Create async task for OpenAI analysis
task = self._analyze_document_with_openai(client, result["markdown"], query, question)
analysis_tasks.append(task)
results_with_markdown.append(result)
else:
results_without_markdown.append(result)
# Fire off all OpenAI requests concurrently
analyses = await asyncio.gather(*analysis_tasks, return_exceptions=True)
# Build processed results
processed_results = []
# Check if any analysis failed - if so, fall back to raw results
for result, analysis in zip(results_with_markdown, analyses):
if isinstance(analysis, Exception) or analysis is None:
logger.error(f"Analysis failed for {result.get('url')}, falling back to raw results")
return search_result.model_dump_json(exclude_none=True)
# All analyses succeeded, build processed results
for result, analysis in zip(results_with_markdown, analyses):
processed_results.append(
{
"url": result.get("url"),
"title": result.get("title"),
"description": result.get("description"),
"analysis": analysis.model_dump() if analysis else None,
}
)
# Add results without markdown
for result in results_without_markdown:
processed_results.append(
{"url": result.get("url"), "title": result.get("title"), "description": result.get("description"), "analysis": None}
)
# Concatenate all relevant snippets into a final response
final_response = self._build_final_response(processed_results, query, question, api_key_source)
return final_response
except Exception as e:
# Log error but continue with raw results
logger.error(f"Error with OpenAI processing: {e}")
# Return raw search results if OpenAI processing isn't available or fails
return search_result.model_dump_json(exclude_none=True)
async def _analyze_document_with_openai(self, client, markdown_content: str, query: str, question: str) -> Optional[DocumentAnalysis]:
"""Use OpenAI to analyze a document and extract relevant passages."""
max_content_length = 200000 # GPT-4.1 has ~1M token context window, so we can be more generous with content length
if len(markdown_content) > max_content_length:
markdown_content = markdown_content[:max_content_length] + "..."
user_prompt = get_firecrawl_search_user_prompt(query, question, markdown_content)
response = await client.beta.chat.completions.parse(
model="gpt-4.1-mini-2025-04-14",
messages=[{"role": "system", "content": FIRECRAWL_SEARCH_SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}],
response_format=DocumentAnalysis,
temperature=0.1,
)
return response.choices[0].message.parsed
def _build_final_response(self, processed_results: List[Dict], query: str, question: str, api_key_source: str = None) -> str:
"""Build the final JSON response from all processed results."""
# Build sources array
sources = []
total_snippets = 0
for result in processed_results:
source = {"url": result.get("url"), "title": result.get("title"), "description": result.get("description")}
if result.get("analysis") and result["analysis"].get("citations"):
analysis = result["analysis"]
source["summary"] = analysis.get("summary")
source["citations"] = analysis["citations"]
total_snippets += len(analysis["citations"])
else:
source["summary"] = "No relevant information found to answer the question"
source["citations"] = []
sources.append(source)
# Build final response structure
response = {
"query": query,
"question": question,
"total_sources": len(sources),
"total_citations": total_snippets,
"sources": sources,
}
# Add API key source if provided
if api_key_source:
response["api_key_source"] = api_key_source
if total_snippets == 0:
response["message"] = "No relevant passages found that directly answer the question."
return json.dumps(response, indent=2, ensure_ascii=False)

View File

@@ -18,6 +18,9 @@ class ToolSettings(BaseSettings):
# Tavily search
tavily_api_key: Optional[str] = None
# Firecrawl search
firecrawl_api_key: Optional[str] = None
# Local Sandbox configurations
tool_exec_dir: Optional[str] = None
tool_sandbox_timeout: float = 180

21
poetry.lock generated
View File

@@ -731,13 +731,13 @@ files = [
[[package]]
name = "certifi"
version = "2025.1.31"
version = "2025.6.15"
description = "Python package for providing Mozilla's CA Bundle."
optional = false
python-versions = ">=3.6"
python-versions = ">=3.7"
files = [
{file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
{file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
{file = "certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057"},
{file = "certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b"},
]
[[package]]
@@ -1537,18 +1537,19 @@ files = [
[[package]]
name = "firecrawl-py"
version = "1.17.0"
version = "2.8.0"
description = "Python SDK for Firecrawl API"
optional = false
python-versions = ">=3.8"
files = [
{file = "firecrawl_py-1.17.0-py3-none-any.whl", hash = "sha256:0392822fbd906731f4c0876f91a9c3cce7624279c81948e4e3f8bc60b4e1c855"},
{file = "firecrawl_py-1.17.0.tar.gz", hash = "sha256:5e2f50ec1f0e67514cdf6f0afc7df6be36eb8277fbec9e1f5a283fc01fae7875"},
{file = "firecrawl_py-2.8.0-py3-none-any.whl", hash = "sha256:f2e148086aa1ca42f603a56009577b4f66a2c23893eaa71f7c9c0082b4fdcf60"},
{file = "firecrawl_py-2.8.0.tar.gz", hash = "sha256:657795b6ddd63f0bd38b38bf0571187e0a66becda23d97c032801895257403c9"},
]
[package.dependencies]
aiohttp = "*"
nest-asyncio = "*"
pydantic = ">=2.10.3"
pydantic = "*"
python-dotenv = "*"
requests = "*"
websockets = "*"
@@ -7385,7 +7386,7 @@ cloud-tool-sandbox = ["e2b-code-interpreter"]
desktop = ["docker", "fastapi", "langchain", "langchain-community", "locust", "pg8000", "pgvector", "psycopg2", "psycopg2-binary", "pyright", "uvicorn", "wikipedia"]
dev = ["autoflake", "black", "isort", "locust", "pexpect", "pre-commit", "pyright", "pytest-asyncio", "pytest-order"]
experimental = ["granian", "uvloop"]
external-tools = ["docker", "langchain", "langchain-community", "wikipedia"]
external-tools = ["docker", "firecrawl-py", "langchain", "langchain-community", "wikipedia"]
google = ["google-genai"]
postgres = ["asyncpg", "pg8000", "pgvector", "psycopg2", "psycopg2-binary"]
redis = ["redis"]
@@ -7395,4 +7396,4 @@ tests = ["wikipedia"]
[metadata]
lock-version = "2.0"
python-versions = "<3.14,>=3.10"
content-hash = "064797612dc82335ea4c5e68aa53535318970789007cc20ebc9bf32a646a03c1"
content-hash = "87b1d77da4ccba13d41d7b6ed9fe24302982e181f84ad93f0cb409f216e33255"

View File

@@ -85,7 +85,7 @@ marshmallow-sqlalchemy = "^1.4.1"
boto3 = {version = "^1.36.24", optional = true}
datamodel-code-generator = {extras = ["http"], version = "^0.25.0"}
mcp = {extras = ["cli"], version = "^1.9.4"}
firecrawl-py = "^1.15.0"
firecrawl-py = "^2.8.0"
apscheduler = "^3.11.0"
aiomultiprocess = "^0.9.1"
matplotlib = "^3.10.1"
@@ -97,6 +97,7 @@ uvloop = {version = "^0.21.0", optional = true}
granian = {version = "^2.3.2", extras = ["uvloop", "reload"], optional = true}
redis = {version = "^6.2.0", optional = true}
structlog = "^25.4.0"
certifi = "^2025.6.15"
[tool.poetry.extras]
@@ -106,7 +107,7 @@ dev = ["pytest", "pytest-asyncio", "pexpect", "black", "pre-commit", "pyright",
experimental = ["uvloop", "granian"]
server = ["websockets", "fastapi", "uvicorn"]
cloud-tool-sandbox = ["e2b-code-interpreter"]
external-tools = ["docker", "langchain", "wikipedia", "langchain-community"]
external-tools = ["docker", "langchain", "wikipedia", "langchain-community", "firecrawl-py"]
tests = ["wikipedia"]
bedrock = ["boto3"]
google = ["google-genai"]

View File

@@ -13,6 +13,7 @@ from letta_client.types import ToolReturnMessage
from letta.schemas.agent import AgentState
from letta.schemas.llm_config import LLMConfig
from letta.settings import tool_settings
# ------------------------------
# Fixtures
@@ -69,24 +70,45 @@ def client(server_url: str) -> Letta:
def agent_state(client: Letta) -> AgentState:
"""
Creates and returns an agent state for testing with a pre-configured agent.
The agent is named 'supervisor' and is configured with base tools and the roll_dice tool.
"""
client.tools.upsert_base_tools()
send_message_tool = client.tools.list(name="send_message")[0]
run_code_tool = client.tools.list(name="run_code")[0]
web_search_tool = client.tools.list(name="web_search")[0]
firecrawl_search_tool = client.tools.list(name="firecrawl_search")[0]
agent_state_instance = client.agents.create(
name="supervisor",
name="test_builtin_tools_agent",
include_base_tools=False,
tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id],
tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id, firecrawl_search_tool.id],
model="openai/gpt-4o",
embedding="letta/letta-free",
tags=["supervisor"],
tags=["test_builtin_tools_agent"],
)
yield agent_state_instance
client.agents.delete(agent_state_instance.id)
@pytest.fixture(scope="module")
def agent_state_with_firecrawl_key(client: Letta) -> AgentState:
"""
Creates and returns an agent state for testing with a pre-configured agent.
"""
client.tools.upsert_base_tools()
send_message_tool = client.tools.list(name="send_message")[0]
run_code_tool = client.tools.list(name="run_code")[0]
web_search_tool = client.tools.list(name="web_search")[0]
firecrawl_search_tool = client.tools.list(name="firecrawl_search")[0]
agent_state_instance = client.agents.create(
name="test_builtin_tools_agent",
include_base_tools=False,
tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id, firecrawl_search_tool.id],
model="openai/gpt-4o",
embedding="letta/letta-free",
tags=["test_builtin_tools_agent"],
tool_exec_environment_variables={"FIRECRAWL_API_KEY": tool_settings.firecrawl_api_key},
)
yield agent_state_instance
# ------------------------------
@@ -200,3 +222,99 @@ def test_web_search(
returns = [m.tool_return for m in tool_returns]
expected = "RESULT 1:"
assert any(expected in ret for ret in returns), f"Expected to find '{expected}' in tool_return, " f"but got {returns!r}"
@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
def test_firecrawl_search(
client: Letta,
agent_state: AgentState,
llm_config: LLMConfig,
) -> None:
user_message = MessageCreate(
role="user",
content="I am executing a test. Use the firecrawl search tool to find where I, Charles Packer, the CEO of Letta, went to school.",
otid=USER_MESSAGE_OTID,
)
response = client.agents.messages.create(
agent_id=agent_state.id,
messages=[user_message],
)
tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)]
assert tool_returns, "No ToolReturnMessage found"
returns = [m.tool_return for m in tool_returns]
print(returns)
# Parse the JSON response from firecrawl_search
assert len(returns) > 0, "No tool returns found"
response_json = json.loads(returns[0])
# Basic structure assertions
assert "query" in response_json, "Missing 'query' field in response"
assert "question" in response_json, "Missing 'question' field in response"
assert "total_sources" in response_json, "Missing 'total_sources' field in response"
assert "total_citations" in response_json, "Missing 'total_citations' field in response"
assert "sources" in response_json, "Missing 'sources' field in response"
assert "api_key_source" in response_json, "Missing 'api_key_source' field in response"
assert response_json["api_key_source"] == "system_settings"
# Content assertions
assert response_json["total_sources"] > 0, "Should have found at least one source"
assert response_json["total_citations"] > 0, "Should have found at least one citation"
assert len(response_json["sources"]) == response_json["total_sources"], "Sources count mismatch"
# Verify we found information about Charles Packer's education
found_education_info = False
for source in response_json["sources"]:
assert "url" in source, "Source missing URL"
assert "title" in source, "Source missing title"
assert "citations" in source, "Source missing citations"
for citation in source["citations"]:
assert "text" in citation, "Citation missing text"
assert "thinking" in citation, "Citation missing thinking"
# Check if we found education-related information
if any(keyword in citation["text"].lower() for keyword in ["berkeley", "phd", "ph.d", "university", "student"]):
found_education_info = True
assert found_education_info, "Should have found education-related information about Charles Packer"
# API key source should be valid
assert response_json["api_key_source"] in [
"agent_environment",
"system_settings",
], f"Invalid api_key_source: {response_json['api_key_source']}"
@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
def test_firecrawl_search_using_agent_state_env_var(
client: Letta,
agent_state_with_firecrawl_key: AgentState,
llm_config: LLMConfig,
) -> None:
user_message = MessageCreate(
role="user",
content="I am executing a test. Use the firecrawl search tool to find where I, Charles Packer, the CEO of Letta, went to school.",
otid=USER_MESSAGE_OTID,
)
response = client.agents.messages.create(
agent_id=agent_state_with_firecrawl_key.id,
messages=[user_message],
)
tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)]
assert tool_returns, "No ToolReturnMessage found"
returns = [m.tool_return for m in tool_returns]
print(returns)
# Parse the JSON response from firecrawl_search
assert len(returns) > 0, "No tool returns found"
response_json = json.loads(returns[0])
# Basic structure assertions
assert response_json["api_key_source"] == "agent_environment"