letta-server/examples/google_search.py

import random
import time
from concurrent.futures import ThreadPoolExecutor
import os
from typing import List, Tuple
from openai import OpenAI
import serpapi

from memgpt.data_sources.connectors import WebConnector
from memgpt.data_types import Document, Passage
from memgpt.credentials import MemGPTCredentials
from memgpt.utils import printd


"""
This example show how you can add a google search custom function to your MemGPT agent.

1. Copy this file into the `~/.memgpt/functions` directory:
```
cp examples/google_search.py  ~/.memgpt/functions/google_search.py
```

2. Create a preset file that include the function `google_search`

3. Add the preset file via the CLI:
```
memgpt add preset -f ~/.memgpt/presets/search_preset.yaml --name search_preset
```
"""


def google_search(self, query: str) -> List[Tuple[str, str]]:
    """

    A tool to search google with the provided query, and return a list of relevant summaries and URLs.

    Args:
        query (str): The search query.

    Returns:
        List[Tuple[str, str]]: A list of up to 5 tuples, each containing a summary of the search result and the URL of the search result in the form (summary, URL)

    Example:
        >>> google_search("How can I make a french 75?")
        [
            (
                "To make a French 75 cocktail, combine 1½ oz. gin, ¾ oz. fresh lemon juice, and ¾ oz. simple syrup in a cocktail shaker with ice. Shake vigorously, then strain into a large flute. Top with 2 oz. Champagne and garnish with a long spiral lemon twist. The recipe prefers gin, but cognac is also traditional. Serve in Champagne flutes for the full effect.",
                "https://www.bonappetit.com/recipe/french-75-3"
            )
        ]
    """

    printd("Starting google search:", query)

    def summarize_text(document_text: str, question: str) -> str:
        # TODO: make request to GPT-4 turbo API for conditional summarization
        prompt = (
            f'Given the question "{question}", summarize the text below. If there is no relevant information, say "No relevant information found.'
            + f"\n\n{document_text}"
        )

        credentials = MemGPTCredentials().load()
        assert credentials.openai_key is not None, credentials.openai_key
        # model = "gpt-4-1106-preview"
        model = "gpt-3.5-turbo-1106"

        client = OpenAI(api_key=credentials.openai_key)
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "user", "content": prompt},
            ],
            model=model,
        )
        response = chat_completion.choices[0].message.content
        # return None if nothing found
        if "No relevant information found." in response:
            return None
        return response

    params = {
        "engine": "google",
        "q": query,
    }

    # get links from web search
    try:
        st = time.time()
        search = serpapi.Client(api_key=os.environ["SERPAPI_API_KEY"]).search(params)
        printd(f"Time taken to retrieve search results: {time.time() - st}")
        results = search["organic_results"]

        links = []
        for result in results:
            data = {"title": result.get("title"), "link": result.get("link"), "snippet": result.get("snippet")}
            links.append(data["link"])
        links = links[:5]
    except Exception as e:
        printd(f"An error occurred with retrieving results: {e}")
        return []

    # retrieve text data from links

    def read_and_summarize_link(link):
        connector = WebConnector([link])
        st = time.time()
        for document_text, document_metadata in connector.generate_documents():
            printd(f"Time taken to retrieve text data: {time.time() - st}")
            # summarize text data
            st = time.time()
            summary = summarize_text(document_text, query)
            printd(f"Time taken to summarize text data: {time.time() - st}, length: {len(document_text)}")
            printd(link)
            if summary is not None:
                return (summary, document_metadata["url"])
        return None

    try:
        futures = []
        st = time.time()
        with ThreadPoolExecutor(max_workers=16) as executor:
            for link in links:
                future = executor.submit(read_and_summarize_link, link)
                futures.append(future)
        response = [future.result() for future in futures if future.result() is not None]
        printd(f"Time taken: {time.time() - st}")
        # response = []
        # connector = WebConnector(links)
        # for document_text, document_metadata in connector.generate_documents():
        #    # summarize text data
        #    summary = summarize_text(document_text, query)
        #    if summary is not None:
        #        response.append((summary,  document_metadata["url"]))
        return response
    except Exception as e:
        printd(f"An error occurred with retrieving text data: {e}")
        return []