fix: Fix summarizer for Anthropic and add integration tests (#2046)

This commit is contained in:
Matthew Zhou
2024-11-15 16:46:12 -08:00
committed by GitHub
parent cf35b9c4cd
commit 2d26365e42
16 changed files with 218 additions and 68 deletions

75
.github/workflows/integration_tests.yml vendored Normal file
View File

@@ -0,0 +1,75 @@
name: Integration Tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
COMPOSIO_API_KEY: ${{ secrets.COMPOSIO_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
run-integration-tests:
runs-on: ubuntu-latest
timeout-minutes: 15
strategy:
fail-fast: false
matrix:
integration_test_suite:
- "integration_test_summarizer.py"
services:
qdrant:
image: qdrant/qdrant
ports:
- 6333:6333
postgres:
image: pgvector/pgvector:pg17
ports:
- 5432:5432
env:
POSTGRES_HOST_AUTH_METHOD: trust
POSTGRES_DB: postgres
POSTGRES_USER: postgres
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python, Poetry, and Dependencies
uses: packetcoders/action-setup-cache-python-poetry@main
with:
python-version: "3.12"
poetry-version: "1.8.2"
install-args: "-E dev -E postgres -E milvus -E external-tools -E tests"
- name: Migrate database
env:
LETTA_PG_PORT: 5432
LETTA_PG_USER: postgres
LETTA_PG_PASSWORD: postgres
LETTA_PG_DB: postgres
LETTA_PG_HOST: localhost
run: |
psql -h localhost -U postgres -d postgres -c 'CREATE EXTENSION vector'
poetry run alembic upgrade head
- name: Run core unit tests
env:
LETTA_PG_PORT: 5432
LETTA_PG_USER: postgres
LETTA_PG_PASSWORD: postgres
LETTA_PG_DB: postgres
LETTA_PG_HOST: localhost
LETTA_SERVER_PASS: test_server_token
run: |
poetry run pytest -s -vv tests/${{ matrix.integration_test_suite }}

View File

@@ -29,7 +29,7 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_valid_first_message
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_valid_first_message
echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -38,7 +38,7 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_keyword
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_keyword
echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -47,7 +47,7 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_uses_external_tool
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_uses_external_tool
echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -56,7 +56,7 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_recall_chat_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_recall_chat_memory
echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -65,7 +65,7 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_archival_memory_retrieval
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_archival_memory_retrieval
echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -74,7 +74,7 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_edit_core_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_edit_core_memory
echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true

View File

@@ -31,7 +31,7 @@ jobs:
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_valid_first_message
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_valid_first_message
echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -41,7 +41,7 @@ jobs:
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_keyword
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_keyword
echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -51,7 +51,7 @@ jobs:
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_uses_external_tool
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_uses_external_tool
echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -61,7 +61,7 @@ jobs:
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_recall_chat_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_recall_chat_memory
echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -71,7 +71,7 @@ jobs:
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_archival_memory_retrieval
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_archival_memory_retrieval
echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -81,7 +81,7 @@ jobs:
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_edit_core_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_edit_core_memory
echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true

View File

@@ -29,7 +29,7 @@ jobs:
env:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_valid_first_message
echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -38,7 +38,7 @@ jobs:
env:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_keyword
echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -47,7 +47,7 @@ jobs:
env:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_uses_external_tool
echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -56,7 +56,7 @@ jobs:
env:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_recall_chat_memory
echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -65,7 +65,7 @@ jobs:
env:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_archival_memory_retrieval
echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true
@@ -74,7 +74,7 @@ jobs:
env:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_edit_core_memory
echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
continue-on-error: true

View File

@@ -23,9 +23,9 @@ jobs:
- name: Test LLM endpoint
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_letta_hosted
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_letta_hosted
continue-on-error: true
- name: Test embedding endpoint
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_letta_hosted
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_letta_hosted

View File

@@ -34,11 +34,11 @@ jobs:
- name: Test LLM endpoint
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_ollama
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_ollama
- name: Test embedding endpoint
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_ollama
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_ollama
- name: Test provider
run: |

View File

@@ -29,53 +29,53 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_valid_first_message
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_valid_first_message
- name: Test model sends message with keyword
id: test_keyword_message
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_keyword
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_keyword
- name: Test model uses external tool correctly
id: test_external_tool
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_uses_external_tool
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_uses_external_tool
- name: Test model recalls chat memory
id: test_chat_memory
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_recall_chat_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_recall_chat_memory
- name: Test model uses 'archival_memory_search' to find secret
id: test_archival_memory_search
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_retrieval
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_retrieval
- name: Test model uses 'archival_memory_insert' to insert archival memories
id: test_archival_memory_insert
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_insert
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_insert
- name: Test model can edit core memories
id: test_core_memory
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_edit_core_memory
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_edit_core_memory
- name: Test embedding endpoint
id: test_embedding_endpoint
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_openai

View File

@@ -131,4 +131,4 @@ jobs:
LETTA_SERVER_PASS: test_server_token
PYTHONPATH: ${{ github.workspace }}:${{ env.PYTHONPATH }}
run: |
poetry run pytest -s -vv -k "not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_endpoints and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests
poetry run pytest -s -vv -k "not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_perfomance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests

5
.gitignore vendored
View File

@@ -1018,3 +1018,8 @@ pgdata/
letta/.pytest_cache/
memgpy/pytest.ini
**/**/pytest_cache
# local sandbox venvs
letta/services/tool_sandbox_env/*
tests/test_tool_sandbox/*

View File

@@ -9,7 +9,7 @@ from tests.helpers.endpoints_helper import (
setup_agent,
)
from tests.helpers.utils import cleanup
from tests.test_endpoints import llm_config_dir
from tests.test_model_letta_perfomance import llm_config_dir
"""
This example shows how you can constrain tool calls in your agent.

View File

@@ -48,6 +48,7 @@ from letta.schemas.tool_rule import TerminalToolRule
from letta.schemas.usage import LettaUsageStatistics
from letta.services.source_manager import SourceManager
from letta.services.user_manager import UserManager
from letta.streaming_interface import StreamingRefreshCLIInterface
from letta.system import (
get_heartbeat,
get_initial_boot_messages,
@@ -229,7 +230,7 @@ class BaseAgent(ABC):
class Agent(BaseAgent):
def __init__(
self,
interface: Optional[AgentInterface],
interface: Optional[Union[AgentInterface, StreamingRefreshCLIInterface]],
# agents can be created from providing agent_state
agent_state: AgentState,
tools: List[Tool],

View File

@@ -242,26 +242,28 @@ def convert_anthropic_response_to_chatcompletion(
finish_reason = remap_finish_reason(response_json["stop_reason"])
if isinstance(response_json["content"], list):
# inner mono + function call
# TODO relax asserts
assert len(response_json["content"]) == 2, response_json
assert response_json["content"][0]["type"] == "text", response_json
assert response_json["content"][1]["type"] == "tool_use", response_json
content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
tool_calls = [
ToolCall(
id=response_json["content"][1]["id"],
type="function",
function=FunctionCall(
name=response_json["content"][1]["name"],
arguments=json.dumps(response_json["content"][1]["input"], indent=2),
),
)
]
if len(response_json["content"]) > 1:
# inner mono + function call
assert len(response_json["content"]) == 2, response_json
assert response_json["content"][0]["type"] == "text", response_json
assert response_json["content"][1]["type"] == "tool_use", response_json
content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
tool_calls = [
ToolCall(
id=response_json["content"][1]["id"],
type="function",
function=FunctionCall(
name=response_json["content"][1]["name"],
arguments=json.dumps(response_json["content"][1]["input"], indent=2),
),
)
]
else:
# Just inner mono
content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
tool_calls = None
else:
# just inner mono
content = strip_xml_tags(string=response_json["content"], tag=inner_thoughts_xml_tag)
tool_calls = None
raise RuntimeError("Unexpected type for content in response_json.")
assert response_json["role"] == "assistant", response_json
choice = Choice(

View File

@@ -1,6 +1,6 @@
{
"context_window": 200000,
"model": "claude-3-opus-20240229",
"model": "claude-3-5-haiku-20241022",
"model_endpoint_type": "anthropic",
"model_endpoint": "https://api.anthropic.com/v1",
"model_wrapper": null,

View File

@@ -0,0 +1,68 @@
import json
import os
import uuid
import pytest
from letta import create_client
from letta.agent import Agent
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.llm_config import LLMConfig
from letta.streaming_interface import StreamingRefreshCLIInterface
from tests.helpers.endpoints_helper import EMBEDDING_CONFIG_PATH
from tests.helpers.utils import cleanup
# constants
LLM_CONFIG_DIR = "tests/configs/llm_model_configs"
SUMMARY_KEY_PHRASE = "The following is a summary"
@pytest.mark.parametrize(
"config_filename",
[
"openai-gpt-4o.json",
"azure-gpt-4o-mini.json",
"claude-3-5-haiku.json",
# "groq.json", TODO: Support groq, rate limiting currently makes it impossible to test
# "gemini-pro.json", TODO: Gemini is broken
],
)
def test_summarizer(config_filename):
namespace = uuid.NAMESPACE_DNS
agent_name = str(uuid.uuid5(namespace, f"integration-test-summarizer-{config_filename}"))
# Get the LLM config
filename = os.path.join(LLM_CONFIG_DIR, config_filename)
config_data = json.load(open(filename, "r"))
# Create client and clean up agents
llm_config = LLMConfig(**config_data)
embedding_config = EmbeddingConfig(**json.load(open(EMBEDDING_CONFIG_PATH)))
client = create_client()
client.set_default_llm_config(llm_config)
client.set_default_embedding_config(embedding_config)
cleanup(client=client, agent_uuid=agent_name)
# Create agent
agent_state = client.create_agent(name=agent_name, llm_config=llm_config, embedding_config=embedding_config)
tools = [client.get_tool(client.get_tool_id(name=tool_name)) for tool_name in agent_state.tools]
letta_agent = Agent(interface=StreamingRefreshCLIInterface(), agent_state=agent_state, tools=tools, first_message_verify_mono=False)
# Make conversation
messages = [
"Did you know that honey never spoils? Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still perfectly edible.",
"Octopuses have three hearts, and two of them stop beating when they swim.",
]
for m in messages:
letta_agent.step_user_message(
user_message_str=m,
first_message=False,
skip_verify=False,
stream=False,
ms=client.server.ms,
)
# Invoke a summarize
letta_agent.summarize_messages_inplace(preserve_last_N_messages=False)
assert SUMMARY_KEY_PHRASE in letta_agent.messages[1]["content"], f"Test failed for config: {config_filename}"

View File

@@ -1,4 +1,3 @@
import os
import uuid
import pytest
@@ -13,12 +12,11 @@ from tests.helpers.endpoints_helper import (
setup_agent,
)
from tests.helpers.utils import cleanup
from tests.test_endpoints import llm_config_dir
# Generate uuid for agent name for this example
namespace = uuid.NAMESPACE_DNS
agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph"))
config_file = os.path.join(llm_config_dir, "openai-gpt-4o.json")
config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json"
"""Contrived tools for this test case"""

View File

@@ -59,6 +59,7 @@ def retry_until_threshold(threshold=0.5, max_attempts=10, sleep_time_seconds=4):
# ======================================================================================================================
# OPENAI TESTS
# ======================================================================================================================
@retry_until_threshold(threshold=0.75, max_attempts=4)
def test_openai_gpt_4o_returns_valid_first_message():
filename = os.path.join(llm_config_dir, "openai-gpt-4o.json")
response = check_first_response_is_valid_for_llm_endpoint(filename)
@@ -205,44 +206,44 @@ def test_embedding_endpoint_ollama():
# ======================================================================================================================
# ANTHROPIC TESTS
# ======================================================================================================================
def test_claude_opus_3_returns_valid_first_message():
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
def test_claude_haiku_3_5_returns_valid_first_message():
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
response = check_first_response_is_valid_for_llm_endpoint(filename)
# Log out successful response
print(f"Got successful response from client: \n\n{response}")
def test_claude_opus_3_returns_keyword():
def test_claude_haiku_3_5_returns_keyword():
keyword = "banana"
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
response = check_response_contains_keyword(filename, keyword=keyword)
# Log out successful response
print(f"Got successful response from client: \n\n{response}")
def test_claude_opus_3_uses_external_tool():
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
def test_claude_haiku_3_5_uses_external_tool():
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
response = check_agent_uses_external_tool(filename)
# Log out successful response
print(f"Got successful response from client: \n\n{response}")
def test_claude_opus_3_recall_chat_memory():
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
def test_claude_haiku_3_5_recall_chat_memory():
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
response = check_agent_recall_chat_memory(filename)
# Log out successful response
print(f"Got successful response from client: \n\n{response}")
def test_claude_opus_3_archival_memory_retrieval():
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
def test_claude_haiku_3_5_archival_memory_retrieval():
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
response = check_agent_archival_memory_retrieval(filename)
# Log out successful response
print(f"Got successful response from client: \n\n{response}")
def test_claude_opus_3_edit_core_memory():
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
def test_claude_haiku_3_5_edit_core_memory():
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
response = check_agent_edit_core_memory(filename)
# Log out successful response
print(f"Got successful response from client: \n\n{response}")