diff --git a/letta/constants.py b/letta/constants.py index aab388bf..6b6ec50a 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -125,7 +125,7 @@ MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile( ) # Built in tools -BUILTIN_TOOLS = ["run_code", "web_search"] +BUILTIN_TOOLS = ["run_code", "web_search", "firecrawl_search"] # Built in tools FILES_TOOLS = ["open_file", "close_file", "grep", "search_files"] diff --git a/letta/functions/function_sets/builtin.py b/letta/functions/function_sets/builtin.py index c8d69568..46944a90 100644 --- a/letta/functions/function_sets/builtin.py +++ b/letta/functions/function_sets/builtin.py @@ -25,3 +25,32 @@ def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> """ raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.") + + +async def firecrawl_search( + query: str, + question: str, + limit: int = 5, + return_raw: bool = False, +) -> str: + """ + Search the web with the `query` and extract passages that answer the provided `question`. + + Examples: + query -> "Tesla Q1 2025 earnings report PDF" + question -> "What was Tesla's net profit in Q1 2025?" + + query -> "Letta API prebuilt tools core_memory_append" + question -> "What does the core_memory_append tool do in Letta?" + + Args: + query (str): The raw web-search query. + question (str): The information goal to answer using the retrieved pages. Consider the context and intent of the conversation so far when forming the question. + limit (int, optional): Maximum number of URLs to fetch and analyse (must be > 0). Defaults to 5. + return_raw (bool, optional): If set to True, returns the raw content of the web page. This should be False unless otherwise specified by the user. Defaults to False. + + Returns: + str: A JSON-encoded string containing ranked snippets with their source + URLs and relevance scores. + """ + raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.") diff --git a/letta/functions/prompts.py b/letta/functions/prompts.py new file mode 100644 index 00000000..f9bac2a0 --- /dev/null +++ b/letta/functions/prompts.py @@ -0,0 +1,25 @@ +"""Prompts for Letta function tools.""" + +FIRECRAWL_SEARCH_SYSTEM_PROMPT = """You are an expert information extraction assistant. Your task is to analyze a document and extract the most relevant passages that answer a specific question, based on a search query context. + +Guidelines: +1. Extract substantial, lengthy text snippets that directly address the question +2. Preserve important context and details in each snippet - err on the side of including more rather than less +3. Keep thinking very brief (1 short sentence) - focus on WHY the snippet is relevant, not WHAT it says +4. Include a concise summary of how the overall document relates to the question +5. Only extract snippets that actually answer or relate to the question - don't force relevance +6. Be comprehensive - include all relevant information, don't limit the number of snippets +7. Prioritize longer, information-rich passages over shorter ones""" + + +def get_firecrawl_search_user_prompt(query: str, question: str, markdown_content: str) -> str: + """Generate the user prompt for firecrawl search analysis.""" + return f"""Search Query: {query} +Question to Answer: {question} + +Document Content: +```markdown +{markdown_content} +``` + +Please analyze this document and extract all relevant passages that help answer the question.""" diff --git a/letta/services/tool_executor/builtin_tool_executor.py b/letta/services/tool_executor/builtin_tool_executor.py index cc320a47..d8abe298 100644 --- a/letta/services/tool_executor/builtin_tool_executor.py +++ b/letta/services/tool_executor/builtin_tool_executor.py @@ -1,8 +1,13 @@ +import asyncio import json from textwrap import shorten -from typing import Any, Dict, Literal, Optional +from typing import Any, Dict, List, Literal, Optional + +from pydantic import BaseModel from letta.constants import WEB_SEARCH_CLIP_CONTENT, WEB_SEARCH_INCLUDE_SCORE, WEB_SEARCH_SEPARATOR +from letta.functions.prompts import FIRECRAWL_SEARCH_SYSTEM_PROMPT, get_firecrawl_search_user_prompt +from letta.log import get_logger from letta.otel.tracing import trace_method from letta.schemas.agent import AgentState from letta.schemas.sandbox_config import SandboxConfig @@ -10,7 +15,23 @@ from letta.schemas.tool import Tool from letta.schemas.tool_execution_result import ToolExecutionResult from letta.schemas.user import User from letta.services.tool_executor.tool_executor_base import ToolExecutor -from letta.settings import tool_settings +from letta.settings import model_settings, tool_settings + +logger = get_logger(__name__) + + +class Citation(BaseModel): + """A relevant text snippet extracted from a document.""" + + text: str + thinking: str # Reasoning of why this snippet is relevant + + +class DocumentAnalysis(BaseModel): + """Analysis of a document's relevance to a search question.""" + + citations: List[Citation] + summary: str # Brief summary of how this document relates to the question class LettaBuiltinToolExecutor(ToolExecutor): @@ -27,14 +48,14 @@ class LettaBuiltinToolExecutor(ToolExecutor): sandbox_config: Optional[SandboxConfig] = None, sandbox_env_vars: Optional[Dict[str, Any]] = None, ) -> ToolExecutionResult: - function_map = {"run_code": self.run_code, "web_search": self.web_search} + function_map = {"run_code": self.run_code, "web_search": self.web_search, "firecrawl_search": self.firecrawl_search} if function_name not in function_map: raise ValueError(f"Unknown function: {function_name}") # Execute the appropriate function function_args_copy = function_args.copy() # Make a copy to avoid modifying the original - function_response = await function_map[function_name](**function_args_copy) + function_response = await function_map[function_name](agent_state=agent_state, **function_args_copy) return ToolExecutionResult( status="success", @@ -42,7 +63,7 @@ class LettaBuiltinToolExecutor(ToolExecutor): agent_state=agent_state, ) - async def run_code(self, code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str: + async def run_code(self, agent_state: "AgentState", code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str: from e2b_code_interpreter import AsyncSandbox if tool_settings.e2b_api_key is None: @@ -70,7 +91,7 @@ class LettaBuiltinToolExecutor(ToolExecutor): out["error"] = err return out - async def web_search(agent_state: "AgentState", query: str) -> str: + async def web_search(self, agent_state: "AgentState", query: str) -> str: """ Search the web for information. Args: @@ -115,3 +136,176 @@ class LettaBuiltinToolExecutor(ToolExecutor): formatted_blocks.append(block) return WEB_SEARCH_SEPARATOR.join(formatted_blocks) + + async def firecrawl_search( + self, + agent_state: "AgentState", + query: str, + question: str, + limit: int = 5, + return_raw: bool = False, + ) -> str: + """ + Search the web with the `query` and extract passages that answer the provided `question`. + + Examples: + query -> "Tesla Q1 2025 earnings report PDF" + question -> "What was Tesla's net profit in Q1 2025?" + + query -> "Letta API prebuilt tools core_memory_append" + question -> "What does the core_memory_append tool do in Letta?" + + Args: + query (str): The raw web-search query. + question (str): The information goal to answer using the retrieved pages. + limit (int, optional): Maximum number of URLs to fetch and analyse (must be > 0). Defaults to 5. + return_raw (bool, optional): If set to True, returns the raw content of the web page. This should be False unless otherwise specified by the user. Defaults to False. + + Returns: + str: A JSON-encoded string containing ranked snippets with their source + URLs and relevance scores. + """ + try: + from firecrawl import AsyncFirecrawlApp, ScrapeOptions + except ImportError: + raise ImportError("firecrawl-py is not installed in the tool execution environment") + + # Check if the API key exists on the agent state + agent_state_tool_env_vars = agent_state.get_agent_env_vars_as_dict() + firecrawl_api_key = agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") or tool_settings.firecrawl_api_key + if not firecrawl_api_key: + raise ValueError("FIRECRAWL_API_KEY is not set in environment or on agent_state tool exec environment variables.") + + # Track which API key source was used + api_key_source = "agent_environment" if agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") else "system_settings" + + if limit <= 0: + raise ValueError("limit must be greater than 0") + + # Initialize Firecrawl client + app = AsyncFirecrawlApp(api_key=firecrawl_api_key) + + # Perform the search, just request markdown + search_result = await app.search(query, limit=limit, scrape_options=ScrapeOptions(formats=["markdown"])) + + if not search_result or not search_result.get("data"): + return json.dumps({"error": "No search results found."}) + + # Check if OpenAI API key is available for semantic parsing + if not return_raw and model_settings.openai_api_key: + try: + from openai import AsyncOpenAI + + # Initialize OpenAI client + client = AsyncOpenAI( + api_key=model_settings.openai_api_key, + ) + + # Process each result with OpenAI concurrently + analysis_tasks = [] + results_with_markdown = [] + results_without_markdown = [] + + for result in search_result.get("data"): + if result.get("markdown"): + # Create async task for OpenAI analysis + task = self._analyze_document_with_openai(client, result["markdown"], query, question) + analysis_tasks.append(task) + results_with_markdown.append(result) + else: + results_without_markdown.append(result) + + # Fire off all OpenAI requests concurrently + analyses = await asyncio.gather(*analysis_tasks, return_exceptions=True) + + # Build processed results + processed_results = [] + + # Check if any analysis failed - if so, fall back to raw results + for result, analysis in zip(results_with_markdown, analyses): + if isinstance(analysis, Exception) or analysis is None: + logger.error(f"Analysis failed for {result.get('url')}, falling back to raw results") + return search_result.model_dump_json(exclude_none=True) + + # All analyses succeeded, build processed results + for result, analysis in zip(results_with_markdown, analyses): + processed_results.append( + { + "url": result.get("url"), + "title": result.get("title"), + "description": result.get("description"), + "analysis": analysis.model_dump() if analysis else None, + } + ) + + # Add results without markdown + for result in results_without_markdown: + processed_results.append( + {"url": result.get("url"), "title": result.get("title"), "description": result.get("description"), "analysis": None} + ) + + # Concatenate all relevant snippets into a final response + final_response = self._build_final_response(processed_results, query, question, api_key_source) + return final_response + except Exception as e: + # Log error but continue with raw results + logger.error(f"Error with OpenAI processing: {e}") + + # Return raw search results if OpenAI processing isn't available or fails + return search_result.model_dump_json(exclude_none=True) + + async def _analyze_document_with_openai(self, client, markdown_content: str, query: str, question: str) -> Optional[DocumentAnalysis]: + """Use OpenAI to analyze a document and extract relevant passages.""" + max_content_length = 200000 # GPT-4.1 has ~1M token context window, so we can be more generous with content length + if len(markdown_content) > max_content_length: + markdown_content = markdown_content[:max_content_length] + "..." + + user_prompt = get_firecrawl_search_user_prompt(query, question, markdown_content) + + response = await client.beta.chat.completions.parse( + model="gpt-4.1-mini-2025-04-14", + messages=[{"role": "system", "content": FIRECRAWL_SEARCH_SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}], + response_format=DocumentAnalysis, + temperature=0.1, + ) + + return response.choices[0].message.parsed + + def _build_final_response(self, processed_results: List[Dict], query: str, question: str, api_key_source: str = None) -> str: + """Build the final JSON response from all processed results.""" + + # Build sources array + sources = [] + total_snippets = 0 + + for result in processed_results: + source = {"url": result.get("url"), "title": result.get("title"), "description": result.get("description")} + + if result.get("analysis") and result["analysis"].get("citations"): + analysis = result["analysis"] + source["summary"] = analysis.get("summary") + source["citations"] = analysis["citations"] + total_snippets += len(analysis["citations"]) + else: + source["summary"] = "No relevant information found to answer the question" + source["citations"] = [] + + sources.append(source) + + # Build final response structure + response = { + "query": query, + "question": question, + "total_sources": len(sources), + "total_citations": total_snippets, + "sources": sources, + } + + # Add API key source if provided + if api_key_source: + response["api_key_source"] = api_key_source + + if total_snippets == 0: + response["message"] = "No relevant passages found that directly answer the question." + + return json.dumps(response, indent=2, ensure_ascii=False) diff --git a/letta/settings.py b/letta/settings.py index 67c45b0e..c97ea4c8 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -18,6 +18,9 @@ class ToolSettings(BaseSettings): # Tavily search tavily_api_key: Optional[str] = None + # Firecrawl search + firecrawl_api_key: Optional[str] = None + # Local Sandbox configurations tool_exec_dir: Optional[str] = None tool_sandbox_timeout: float = 180 diff --git a/poetry.lock b/poetry.lock index c45721db..4886d3e4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -731,13 +731,13 @@ files = [ [[package]] name = "certifi" -version = "2025.1.31" +version = "2025.6.15" description = "Python package for providing Mozilla's CA Bundle." optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, - {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, + {file = "certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057"}, + {file = "certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b"}, ] [[package]] @@ -1537,18 +1537,19 @@ files = [ [[package]] name = "firecrawl-py" -version = "1.17.0" +version = "2.8.0" description = "Python SDK for Firecrawl API" optional = false python-versions = ">=3.8" files = [ - {file = "firecrawl_py-1.17.0-py3-none-any.whl", hash = "sha256:0392822fbd906731f4c0876f91a9c3cce7624279c81948e4e3f8bc60b4e1c855"}, - {file = "firecrawl_py-1.17.0.tar.gz", hash = "sha256:5e2f50ec1f0e67514cdf6f0afc7df6be36eb8277fbec9e1f5a283fc01fae7875"}, + {file = "firecrawl_py-2.8.0-py3-none-any.whl", hash = "sha256:f2e148086aa1ca42f603a56009577b4f66a2c23893eaa71f7c9c0082b4fdcf60"}, + {file = "firecrawl_py-2.8.0.tar.gz", hash = "sha256:657795b6ddd63f0bd38b38bf0571187e0a66becda23d97c032801895257403c9"}, ] [package.dependencies] +aiohttp = "*" nest-asyncio = "*" -pydantic = ">=2.10.3" +pydantic = "*" python-dotenv = "*" requests = "*" websockets = "*" @@ -7385,7 +7386,7 @@ cloud-tool-sandbox = ["e2b-code-interpreter"] desktop = ["docker", "fastapi", "langchain", "langchain-community", "locust", "pg8000", "pgvector", "psycopg2", "psycopg2-binary", "pyright", "uvicorn", "wikipedia"] dev = ["autoflake", "black", "isort", "locust", "pexpect", "pre-commit", "pyright", "pytest-asyncio", "pytest-order"] experimental = ["granian", "uvloop"] -external-tools = ["docker", "langchain", "langchain-community", "wikipedia"] +external-tools = ["docker", "firecrawl-py", "langchain", "langchain-community", "wikipedia"] google = ["google-genai"] postgres = ["asyncpg", "pg8000", "pgvector", "psycopg2", "psycopg2-binary"] redis = ["redis"] @@ -7395,4 +7396,4 @@ tests = ["wikipedia"] [metadata] lock-version = "2.0" python-versions = "<3.14,>=3.10" -content-hash = "064797612dc82335ea4c5e68aa53535318970789007cc20ebc9bf32a646a03c1" +content-hash = "87b1d77da4ccba13d41d7b6ed9fe24302982e181f84ad93f0cb409f216e33255" diff --git a/pyproject.toml b/pyproject.toml index 6c53837a..a575de79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ marshmallow-sqlalchemy = "^1.4.1" boto3 = {version = "^1.36.24", optional = true} datamodel-code-generator = {extras = ["http"], version = "^0.25.0"} mcp = {extras = ["cli"], version = "^1.9.4"} -firecrawl-py = "^1.15.0" +firecrawl-py = "^2.8.0" apscheduler = "^3.11.0" aiomultiprocess = "^0.9.1" matplotlib = "^3.10.1" @@ -97,6 +97,7 @@ uvloop = {version = "^0.21.0", optional = true} granian = {version = "^2.3.2", extras = ["uvloop", "reload"], optional = true} redis = {version = "^6.2.0", optional = true} structlog = "^25.4.0" +certifi = "^2025.6.15" [tool.poetry.extras] @@ -106,7 +107,7 @@ dev = ["pytest", "pytest-asyncio", "pexpect", "black", "pre-commit", "pyright", experimental = ["uvloop", "granian"] server = ["websockets", "fastapi", "uvicorn"] cloud-tool-sandbox = ["e2b-code-interpreter"] -external-tools = ["docker", "langchain", "wikipedia", "langchain-community"] +external-tools = ["docker", "langchain", "wikipedia", "langchain-community", "firecrawl-py"] tests = ["wikipedia"] bedrock = ["boto3"] google = ["google-genai"] diff --git a/tests/integration_test_builtin_tools.py b/tests/integration_test_builtin_tools.py index ebb09d03..d43a8510 100644 --- a/tests/integration_test_builtin_tools.py +++ b/tests/integration_test_builtin_tools.py @@ -13,6 +13,7 @@ from letta_client.types import ToolReturnMessage from letta.schemas.agent import AgentState from letta.schemas.llm_config import LLMConfig +from letta.settings import tool_settings # ------------------------------ # Fixtures @@ -69,24 +70,45 @@ def client(server_url: str) -> Letta: def agent_state(client: Letta) -> AgentState: """ Creates and returns an agent state for testing with a pre-configured agent. - The agent is named 'supervisor' and is configured with base tools and the roll_dice tool. """ client.tools.upsert_base_tools() send_message_tool = client.tools.list(name="send_message")[0] run_code_tool = client.tools.list(name="run_code")[0] web_search_tool = client.tools.list(name="web_search")[0] + firecrawl_search_tool = client.tools.list(name="firecrawl_search")[0] agent_state_instance = client.agents.create( - name="supervisor", + name="test_builtin_tools_agent", include_base_tools=False, - tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id], + tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id, firecrawl_search_tool.id], model="openai/gpt-4o", embedding="letta/letta-free", - tags=["supervisor"], + tags=["test_builtin_tools_agent"], ) yield agent_state_instance - client.agents.delete(agent_state_instance.id) + +@pytest.fixture(scope="module") +def agent_state_with_firecrawl_key(client: Letta) -> AgentState: + """ + Creates and returns an agent state for testing with a pre-configured agent. + """ + client.tools.upsert_base_tools() + + send_message_tool = client.tools.list(name="send_message")[0] + run_code_tool = client.tools.list(name="run_code")[0] + web_search_tool = client.tools.list(name="web_search")[0] + firecrawl_search_tool = client.tools.list(name="firecrawl_search")[0] + agent_state_instance = client.agents.create( + name="test_builtin_tools_agent", + include_base_tools=False, + tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id, firecrawl_search_tool.id], + model="openai/gpt-4o", + embedding="letta/letta-free", + tags=["test_builtin_tools_agent"], + tool_exec_environment_variables={"FIRECRAWL_API_KEY": tool_settings.firecrawl_api_key}, + ) + yield agent_state_instance # ------------------------------ @@ -200,3 +222,99 @@ def test_web_search( returns = [m.tool_return for m in tool_returns] expected = "RESULT 1:" assert any(expected in ret for ret in returns), f"Expected to find '{expected}' in tool_return, " f"but got {returns!r}" + + +@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS]) +def test_firecrawl_search( + client: Letta, + agent_state: AgentState, + llm_config: LLMConfig, +) -> None: + user_message = MessageCreate( + role="user", + content="I am executing a test. Use the firecrawl search tool to find where I, Charles Packer, the CEO of Letta, went to school.", + otid=USER_MESSAGE_OTID, + ) + + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[user_message], + ) + + tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)] + assert tool_returns, "No ToolReturnMessage found" + + returns = [m.tool_return for m in tool_returns] + print(returns) + + # Parse the JSON response from firecrawl_search + assert len(returns) > 0, "No tool returns found" + response_json = json.loads(returns[0]) + + # Basic structure assertions + assert "query" in response_json, "Missing 'query' field in response" + assert "question" in response_json, "Missing 'question' field in response" + assert "total_sources" in response_json, "Missing 'total_sources' field in response" + assert "total_citations" in response_json, "Missing 'total_citations' field in response" + assert "sources" in response_json, "Missing 'sources' field in response" + assert "api_key_source" in response_json, "Missing 'api_key_source' field in response" + assert response_json["api_key_source"] == "system_settings" + + # Content assertions + assert response_json["total_sources"] > 0, "Should have found at least one source" + assert response_json["total_citations"] > 0, "Should have found at least one citation" + assert len(response_json["sources"]) == response_json["total_sources"], "Sources count mismatch" + + # Verify we found information about Charles Packer's education + found_education_info = False + for source in response_json["sources"]: + assert "url" in source, "Source missing URL" + assert "title" in source, "Source missing title" + assert "citations" in source, "Source missing citations" + + for citation in source["citations"]: + assert "text" in citation, "Citation missing text" + assert "thinking" in citation, "Citation missing thinking" + + # Check if we found education-related information + if any(keyword in citation["text"].lower() for keyword in ["berkeley", "phd", "ph.d", "university", "student"]): + found_education_info = True + + assert found_education_info, "Should have found education-related information about Charles Packer" + + # API key source should be valid + assert response_json["api_key_source"] in [ + "agent_environment", + "system_settings", + ], f"Invalid api_key_source: {response_json['api_key_source']}" + + +@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS]) +def test_firecrawl_search_using_agent_state_env_var( + client: Letta, + agent_state_with_firecrawl_key: AgentState, + llm_config: LLMConfig, +) -> None: + user_message = MessageCreate( + role="user", + content="I am executing a test. Use the firecrawl search tool to find where I, Charles Packer, the CEO of Letta, went to school.", + otid=USER_MESSAGE_OTID, + ) + + response = client.agents.messages.create( + agent_id=agent_state_with_firecrawl_key.id, + messages=[user_message], + ) + + tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)] + assert tool_returns, "No ToolReturnMessage found" + + returns = [m.tool_return for m in tool_returns] + print(returns) + + # Parse the JSON response from firecrawl_search + assert len(returns) > 0, "No tool returns found" + response_json = json.loads(returns[0]) + + # Basic structure assertions + assert response_json["api_key_source"] == "agent_environment"