feat: Add built in firecrawl search tool (#2858)

2025-06-17 01:16:39 -07:00
parent 65530e8380
commit d991d37b04
8 changed files with 395 additions and 24 deletions
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -125,7 +125,7 @@ MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
 )

 # Built in tools
-BUILTIN_TOOLS = ["run_code", "web_search"]
+BUILTIN_TOOLS = ["run_code", "web_search", "firecrawl_search"]

 # Built in tools
 FILES_TOOLS = ["open_file", "close_file", "grep", "search_files"]
--- a/letta/functions/function_sets/builtin.py
+++ b/letta/functions/function_sets/builtin.py
@@ -25,3 +25,32 @@ def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) ->
    """

    raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
+
+
+async def firecrawl_search(
+    query: str,
+    question: str,
+    limit: int = 5,
+    return_raw: bool = False,
+) -> str:
+    """
+    Search the web with the `query` and extract passages that answer the provided `question`.
+
+    Examples:
+    query -> "Tesla Q1 2025 earnings report PDF"
+    question -> "What was Tesla's net profit in Q1 2025?"
+
+    query -> "Letta API prebuilt tools core_memory_append"
+    question -> "What does the core_memory_append tool do in Letta?"
+
+    Args:
+        query (str): The raw web-search query.
+        question (str): The information goal to answer using the retrieved pages. Consider the context and intent of the conversation so far when forming the question.
+        limit (int, optional): Maximum number of URLs to fetch and analyse (must be > 0). Defaults to 5.
+        return_raw (bool, optional): If set to True, returns the raw content of the web page. This should be False unless otherwise specified by the user. Defaults to False.
+
+    Returns:
+        str: A JSON-encoded string containing ranked snippets with their source
+        URLs and relevance scores.
+    """
+    raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
--- a/letta/functions/prompts.py
+++ b/letta/functions/prompts.py
@@ -0,0 +1,25 @@
+"""Prompts for Letta function tools."""
+
+FIRECRAWL_SEARCH_SYSTEM_PROMPT = """You are an expert information extraction assistant. Your task is to analyze a document and extract the most relevant passages that answer a specific question, based on a search query context.
+
+Guidelines:
+1. Extract substantial, lengthy text snippets that directly address the question
+2. Preserve important context and details in each snippet - err on the side of including more rather than less
+3. Keep thinking very brief (1 short sentence) - focus on WHY the snippet is relevant, not WHAT it says
+4. Include a concise summary of how the overall document relates to the question
+5. Only extract snippets that actually answer or relate to the question - don't force relevance
+6. Be comprehensive - include all relevant information, don't limit the number of snippets
+7. Prioritize longer, information-rich passages over shorter ones"""
+
+
+def get_firecrawl_search_user_prompt(query: str, question: str, markdown_content: str) -> str:
+    """Generate the user prompt for firecrawl search analysis."""
+    return f"""Search Query: {query}
+Question to Answer: {question}
+
+Document Content:
+```markdown
+{markdown_content}
+```
+
+Please analyze this document and extract all relevant passages that help answer the question."""
--- a/letta/services/tool_executor/builtin_tool_executor.py
+++ b/letta/services/tool_executor/builtin_tool_executor.py
@@ -1,8 +1,13 @@
+import asyncio
 import json
 from textwrap import shorten
-from typing import Any, Dict, Literal, Optional
+from typing import Any, Dict, List, Literal, Optional
+
+from pydantic import BaseModel

 from letta.constants import WEB_SEARCH_CLIP_CONTENT, WEB_SEARCH_INCLUDE_SCORE, WEB_SEARCH_SEPARATOR
+from letta.functions.prompts import FIRECRAWL_SEARCH_SYSTEM_PROMPT, get_firecrawl_search_user_prompt
+from letta.log import get_logger
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
 from letta.schemas.sandbox_config import SandboxConfig
@@ -10,7 +15,23 @@ from letta.schemas.tool import Tool
 from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
 from letta.services.tool_executor.tool_executor_base import ToolExecutor
-from letta.settings import tool_settings
+from letta.settings import model_settings, tool_settings
+
+logger = get_logger(__name__)
+
+
+class Citation(BaseModel):
+    """A relevant text snippet extracted from a document."""
+
+    text: str
+    thinking: str  # Reasoning of why this snippet is relevant
+
+
+class DocumentAnalysis(BaseModel):
+    """Analysis of a document's relevance to a search question."""
+
+    citations: List[Citation]
+    summary: str  # Brief summary of how this document relates to the question


 class LettaBuiltinToolExecutor(ToolExecutor):
@@ -27,14 +48,14 @@ class LettaBuiltinToolExecutor(ToolExecutor):
        sandbox_config: Optional[SandboxConfig] = None,
        sandbox_env_vars: Optional[Dict[str, Any]] = None,
    ) -> ToolExecutionResult:
-        function_map = {"run_code": self.run_code, "web_search": self.web_search}
+        function_map = {"run_code": self.run_code, "web_search": self.web_search, "firecrawl_search": self.firecrawl_search}

        if function_name not in function_map:
            raise ValueError(f"Unknown function: {function_name}")

        # Execute the appropriate function
        function_args_copy = function_args.copy()  # Make a copy to avoid modifying the original
-        function_response = await function_map[function_name](**function_args_copy)
+        function_response = await function_map[function_name](agent_state=agent_state, **function_args_copy)

        return ToolExecutionResult(
            status="success",
@@ -42,7 +63,7 @@ class LettaBuiltinToolExecutor(ToolExecutor):
            agent_state=agent_state,
        )

-    async def run_code(self, code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
+    async def run_code(self, agent_state: "AgentState", code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
        from e2b_code_interpreter import AsyncSandbox

        if tool_settings.e2b_api_key is None:
@@ -70,7 +91,7 @@ class LettaBuiltinToolExecutor(ToolExecutor):
            out["error"] = err
        return out

-    async def web_search(agent_state: "AgentState", query: str) -> str:
+    async def web_search(self, agent_state: "AgentState", query: str) -> str:
        """
        Search the web for information.
        Args:
@@ -115,3 +136,176 @@ class LettaBuiltinToolExecutor(ToolExecutor):
            formatted_blocks.append(block)

        return WEB_SEARCH_SEPARATOR.join(formatted_blocks)
+
+    async def firecrawl_search(
+        self,
+        agent_state: "AgentState",
+        query: str,
+        question: str,
+        limit: int = 5,
+        return_raw: bool = False,
+    ) -> str:
+        """
+        Search the web with the `query` and extract passages that answer the provided `question`.
+
+        Examples:
+        query -> "Tesla Q1 2025 earnings report PDF"
+        question -> "What was Tesla's net profit in Q1 2025?"
+
+        query -> "Letta API prebuilt tools core_memory_append"
+        question -> "What does the core_memory_append tool do in Letta?"
+
+        Args:
+            query (str): The raw web-search query.
+            question (str): The information goal to answer using the retrieved pages.
+            limit (int, optional): Maximum number of URLs to fetch and analyse (must be > 0). Defaults to 5.
+            return_raw (bool, optional): If set to True, returns the raw content of the web page. This should be False unless otherwise specified by the user. Defaults to False.
+
+        Returns:
+            str: A JSON-encoded string containing ranked snippets with their source
+            URLs and relevance scores.
+        """
+        try:
+            from firecrawl import AsyncFirecrawlApp, ScrapeOptions
+        except ImportError:
+            raise ImportError("firecrawl-py is not installed in the tool execution environment")
+
+        # Check if the API key exists on the agent state
+        agent_state_tool_env_vars = agent_state.get_agent_env_vars_as_dict()
+        firecrawl_api_key = agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") or tool_settings.firecrawl_api_key
+        if not firecrawl_api_key:
+            raise ValueError("FIRECRAWL_API_KEY is not set in environment or on agent_state tool exec environment variables.")
+
+        # Track which API key source was used
+        api_key_source = "agent_environment" if agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") else "system_settings"
+
+        if limit <= 0:
+            raise ValueError("limit must be greater than 0")
+
+        # Initialize Firecrawl client
+        app = AsyncFirecrawlApp(api_key=firecrawl_api_key)
+
+        # Perform the search, just request markdown
+        search_result = await app.search(query, limit=limit, scrape_options=ScrapeOptions(formats=["markdown"]))
+
+        if not search_result or not search_result.get("data"):
+            return json.dumps({"error": "No search results found."})
+
+        # Check if OpenAI API key is available for semantic parsing
+        if not return_raw and model_settings.openai_api_key:
+            try:
+                from openai import AsyncOpenAI
+
+                # Initialize OpenAI client
+                client = AsyncOpenAI(
+                    api_key=model_settings.openai_api_key,
+                )
+
+                # Process each result with OpenAI concurrently
+                analysis_tasks = []
+                results_with_markdown = []
+                results_without_markdown = []
+
+                for result in search_result.get("data"):
+                    if result.get("markdown"):
+                        # Create async task for OpenAI analysis
+                        task = self._analyze_document_with_openai(client, result["markdown"], query, question)
+                        analysis_tasks.append(task)
+                        results_with_markdown.append(result)
+                    else:
+                        results_without_markdown.append(result)
+
+                # Fire off all OpenAI requests concurrently
+                analyses = await asyncio.gather(*analysis_tasks, return_exceptions=True)
+
+                # Build processed results
+                processed_results = []
+
+                # Check if any analysis failed - if so, fall back to raw results
+                for result, analysis in zip(results_with_markdown, analyses):
+                    if isinstance(analysis, Exception) or analysis is None:
+                        logger.error(f"Analysis failed for {result.get('url')}, falling back to raw results")
+                        return search_result.model_dump_json(exclude_none=True)
+
+                # All analyses succeeded, build processed results
+                for result, analysis in zip(results_with_markdown, analyses):
+                    processed_results.append(
+                        {
+                            "url": result.get("url"),
+                            "title": result.get("title"),
+                            "description": result.get("description"),
+                            "analysis": analysis.model_dump() if analysis else None,
+                        }
+                    )
+
+                # Add results without markdown
+                for result in results_without_markdown:
+                    processed_results.append(
+                        {"url": result.get("url"), "title": result.get("title"), "description": result.get("description"), "analysis": None}
+                    )
+
+                # Concatenate all relevant snippets into a final response
+                final_response = self._build_final_response(processed_results, query, question, api_key_source)
+                return final_response
+            except Exception as e:
+                # Log error but continue with raw results
+                logger.error(f"Error with OpenAI processing: {e}")
+
+        # Return raw search results if OpenAI processing isn't available or fails
+        return search_result.model_dump_json(exclude_none=True)
+
+    async def _analyze_document_with_openai(self, client, markdown_content: str, query: str, question: str) -> Optional[DocumentAnalysis]:
+        """Use OpenAI to analyze a document and extract relevant passages."""
+        max_content_length = 200000  # GPT-4.1 has ~1M token context window, so we can be more generous with content length
+        if len(markdown_content) > max_content_length:
+            markdown_content = markdown_content[:max_content_length] + "..."
+
+        user_prompt = get_firecrawl_search_user_prompt(query, question, markdown_content)
+
+        response = await client.beta.chat.completions.parse(
+            model="gpt-4.1-mini-2025-04-14",
+            messages=[{"role": "system", "content": FIRECRAWL_SEARCH_SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}],
+            response_format=DocumentAnalysis,
+            temperature=0.1,
+        )
+
+        return response.choices[0].message.parsed
+
+    def _build_final_response(self, processed_results: List[Dict], query: str, question: str, api_key_source: str = None) -> str:
+        """Build the final JSON response from all processed results."""
+
+        # Build sources array
+        sources = []
+        total_snippets = 0
+
+        for result in processed_results:
+            source = {"url": result.get("url"), "title": result.get("title"), "description": result.get("description")}
+
+            if result.get("analysis") and result["analysis"].get("citations"):
+                analysis = result["analysis"]
+                source["summary"] = analysis.get("summary")
+                source["citations"] = analysis["citations"]
+                total_snippets += len(analysis["citations"])
+            else:
+                source["summary"] = "No relevant information found to answer the question"
+                source["citations"] = []
+
+            sources.append(source)
+
+        # Build final response structure
+        response = {
+            "query": query,
+            "question": question,
+            "total_sources": len(sources),
+            "total_citations": total_snippets,
+            "sources": sources,
+        }
+
+        # Add API key source if provided
+        if api_key_source:
+            response["api_key_source"] = api_key_source
+
+        if total_snippets == 0:
+            response["message"] = "No relevant passages found that directly answer the question."
+
+        return json.dumps(response, indent=2, ensure_ascii=False)
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -18,6 +18,9 @@ class ToolSettings(BaseSettings):
    # Tavily search
    tavily_api_key: Optional[str] = None

+    # Firecrawl search
+    firecrawl_api_key: Optional[str] = None
+
    # Local Sandbox configurations
    tool_exec_dir: Optional[str] = None
    tool_sandbox_timeout: float = 180
--- a/poetry.lock
+++ b/poetry.lock
@@ -731,13 +731,13 @@ files = [

 [[package]]
 name = "certifi"
-version = "2025.1.31"
+version = "2025.6.15"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
-    {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
+    {file = "certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057"},
+    {file = "certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b"},
 ]

 [[package]]
@@ -1537,18 +1537,19 @@ files = [

 [[package]]
 name = "firecrawl-py"
-version = "1.17.0"
+version = "2.8.0"
 description = "Python SDK for Firecrawl API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "firecrawl_py-1.17.0-py3-none-any.whl", hash = "sha256:0392822fbd906731f4c0876f91a9c3cce7624279c81948e4e3f8bc60b4e1c855"},
-    {file = "firecrawl_py-1.17.0.tar.gz", hash = "sha256:5e2f50ec1f0e67514cdf6f0afc7df6be36eb8277fbec9e1f5a283fc01fae7875"},
+    {file = "firecrawl_py-2.8.0-py3-none-any.whl", hash = "sha256:f2e148086aa1ca42f603a56009577b4f66a2c23893eaa71f7c9c0082b4fdcf60"},
+    {file = "firecrawl_py-2.8.0.tar.gz", hash = "sha256:657795b6ddd63f0bd38b38bf0571187e0a66becda23d97c032801895257403c9"},
 ]

 [package.dependencies]
+aiohttp = "*"
 nest-asyncio = "*"
-pydantic = ">=2.10.3"
+pydantic = "*"
 python-dotenv = "*"
 requests = "*"
 websockets = "*"
@@ -7385,7 +7386,7 @@ cloud-tool-sandbox = ["e2b-code-interpreter"]
 desktop = ["docker", "fastapi", "langchain", "langchain-community", "locust", "pg8000", "pgvector", "psycopg2", "psycopg2-binary", "pyright", "uvicorn", "wikipedia"]
 dev = ["autoflake", "black", "isort", "locust", "pexpect", "pre-commit", "pyright", "pytest-asyncio", "pytest-order"]
 experimental = ["granian", "uvloop"]
-external-tools = ["docker", "langchain", "langchain-community", "wikipedia"]
+external-tools = ["docker", "firecrawl-py", "langchain", "langchain-community", "wikipedia"]
 google = ["google-genai"]
 postgres = ["asyncpg", "pg8000", "pgvector", "psycopg2", "psycopg2-binary"]
 redis = ["redis"]
@@ -7395,4 +7396,4 @@ tests = ["wikipedia"]
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.14,>=3.10"
-content-hash = "064797612dc82335ea4c5e68aa53535318970789007cc20ebc9bf32a646a03c1"
+content-hash = "87b1d77da4ccba13d41d7b6ed9fe24302982e181f84ad93f0cb409f216e33255"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,7 +85,7 @@ marshmallow-sqlalchemy = "^1.4.1"
 boto3 = {version = "^1.36.24", optional = true}
 datamodel-code-generator = {extras = ["http"], version = "^0.25.0"}
 mcp = {extras = ["cli"], version = "^1.9.4"}
-firecrawl-py = "^1.15.0"
+firecrawl-py = "^2.8.0"
 apscheduler = "^3.11.0"
 aiomultiprocess = "^0.9.1"
 matplotlib = "^3.10.1"
@@ -97,6 +97,7 @@ uvloop = {version = "^0.21.0", optional = true}
 granian = {version = "^2.3.2", extras = ["uvloop", "reload"], optional = true}
 redis = {version = "^6.2.0", optional = true}
 structlog = "^25.4.0"
+certifi = "^2025.6.15"


 [tool.poetry.extras]
@@ -106,7 +107,7 @@ dev = ["pytest", "pytest-asyncio", "pexpect", "black", "pre-commit", "pyright",
 experimental = ["uvloop", "granian"]
 server = ["websockets", "fastapi", "uvicorn"]
 cloud-tool-sandbox = ["e2b-code-interpreter"]
-external-tools = ["docker", "langchain", "wikipedia", "langchain-community"]
+external-tools = ["docker", "langchain", "wikipedia", "langchain-community", "firecrawl-py"]
 tests = ["wikipedia"]
 bedrock = ["boto3"]
 google = ["google-genai"]
--- a/tests/integration_test_builtin_tools.py
+++ b/tests/integration_test_builtin_tools.py
@@ -13,6 +13,7 @@ from letta_client.types import ToolReturnMessage

 from letta.schemas.agent import AgentState
 from letta.schemas.llm_config import LLMConfig
+from letta.settings import tool_settings

 # ------------------------------
 # Fixtures
@@ -69,24 +70,45 @@ def client(server_url: str) -> Letta:
 def agent_state(client: Letta) -> AgentState:
    """
    Creates and returns an agent state for testing with a pre-configured agent.
-    The agent is named 'supervisor' and is configured with base tools and the roll_dice tool.
    """
    client.tools.upsert_base_tools()

    send_message_tool = client.tools.list(name="send_message")[0]
    run_code_tool = client.tools.list(name="run_code")[0]
    web_search_tool = client.tools.list(name="web_search")[0]
+    firecrawl_search_tool = client.tools.list(name="firecrawl_search")[0]
    agent_state_instance = client.agents.create(
-        name="supervisor",
+        name="test_builtin_tools_agent",
        include_base_tools=False,
-        tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id],
+        tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id, firecrawl_search_tool.id],
        model="openai/gpt-4o",
        embedding="letta/letta-free",
-        tags=["supervisor"],
+        tags=["test_builtin_tools_agent"],
    )
    yield agent_state_instance

-    client.agents.delete(agent_state_instance.id)
+
+@pytest.fixture(scope="module")
+def agent_state_with_firecrawl_key(client: Letta) -> AgentState:
+    """
+    Creates and returns an agent state for testing with a pre-configured agent.
+    """
+    client.tools.upsert_base_tools()
+
+    send_message_tool = client.tools.list(name="send_message")[0]
+    run_code_tool = client.tools.list(name="run_code")[0]
+    web_search_tool = client.tools.list(name="web_search")[0]
+    firecrawl_search_tool = client.tools.list(name="firecrawl_search")[0]
+    agent_state_instance = client.agents.create(
+        name="test_builtin_tools_agent",
+        include_base_tools=False,
+        tool_ids=[send_message_tool.id, run_code_tool.id, web_search_tool.id, firecrawl_search_tool.id],
+        model="openai/gpt-4o",
+        embedding="letta/letta-free",
+        tags=["test_builtin_tools_agent"],
+        tool_exec_environment_variables={"FIRECRAWL_API_KEY": tool_settings.firecrawl_api_key},
+    )
+    yield agent_state_instance


 # ------------------------------
@@ -200,3 +222,99 @@ def test_web_search(
    returns = [m.tool_return for m in tool_returns]
    expected = "RESULT 1:"
    assert any(expected in ret for ret in returns), f"Expected to find '{expected}' in tool_return, " f"but got {returns!r}"
+
+
+@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
+def test_firecrawl_search(
+    client: Letta,
+    agent_state: AgentState,
+    llm_config: LLMConfig,
+) -> None:
+    user_message = MessageCreate(
+        role="user",
+        content="I am executing a test. Use the firecrawl search tool to find where I, Charles Packer, the CEO of Letta, went to school.",
+        otid=USER_MESSAGE_OTID,
+    )
+
+    response = client.agents.messages.create(
+        agent_id=agent_state.id,
+        messages=[user_message],
+    )
+
+    tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)]
+    assert tool_returns, "No ToolReturnMessage found"
+
+    returns = [m.tool_return for m in tool_returns]
+    print(returns)
+
+    # Parse the JSON response from firecrawl_search
+    assert len(returns) > 0, "No tool returns found"
+    response_json = json.loads(returns[0])
+
+    # Basic structure assertions
+    assert "query" in response_json, "Missing 'query' field in response"
+    assert "question" in response_json, "Missing 'question' field in response"
+    assert "total_sources" in response_json, "Missing 'total_sources' field in response"
+    assert "total_citations" in response_json, "Missing 'total_citations' field in response"
+    assert "sources" in response_json, "Missing 'sources' field in response"
+    assert "api_key_source" in response_json, "Missing 'api_key_source' field in response"
+    assert response_json["api_key_source"] == "system_settings"
+
+    # Content assertions
+    assert response_json["total_sources"] > 0, "Should have found at least one source"
+    assert response_json["total_citations"] > 0, "Should have found at least one citation"
+    assert len(response_json["sources"]) == response_json["total_sources"], "Sources count mismatch"
+
+    # Verify we found information about Charles Packer's education
+    found_education_info = False
+    for source in response_json["sources"]:
+        assert "url" in source, "Source missing URL"
+        assert "title" in source, "Source missing title"
+        assert "citations" in source, "Source missing citations"
+
+        for citation in source["citations"]:
+            assert "text" in citation, "Citation missing text"
+            assert "thinking" in citation, "Citation missing thinking"
+
+            # Check if we found education-related information
+            if any(keyword in citation["text"].lower() for keyword in ["berkeley", "phd", "ph.d", "university", "student"]):
+                found_education_info = True
+
+    assert found_education_info, "Should have found education-related information about Charles Packer"
+
+    # API key source should be valid
+    assert response_json["api_key_source"] in [
+        "agent_environment",
+        "system_settings",
+    ], f"Invalid api_key_source: {response_json['api_key_source']}"
+
+
+@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
+def test_firecrawl_search_using_agent_state_env_var(
+    client: Letta,
+    agent_state_with_firecrawl_key: AgentState,
+    llm_config: LLMConfig,
+) -> None:
+    user_message = MessageCreate(
+        role="user",
+        content="I am executing a test. Use the firecrawl search tool to find where I, Charles Packer, the CEO of Letta, went to school.",
+        otid=USER_MESSAGE_OTID,
+    )
+
+    response = client.agents.messages.create(
+        agent_id=agent_state_with_firecrawl_key.id,
+        messages=[user_message],
+    )
+
+    tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)]
+    assert tool_returns, "No ToolReturnMessage found"
+
+    returns = [m.tool_return for m in tool_returns]
+    print(returns)
+
+    # Parse the JSON response from firecrawl_search
+    assert len(returns) > 0, "No tool returns found"
+    response_json = json.loads(returns[0])
+
+    # Basic structure assertions
+    assert response_json["api_key_source"] == "agent_environment"