fix: Fix summarizer for Anthropic and add integration tests (#2046)
This commit is contained in:
75
.github/workflows/integration_tests.yml
vendored
Normal file
75
.github/workflows/integration_tests.yml
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
name: Integration Tests
|
||||
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
COMPOSIO_API_KEY: ${{ secrets.COMPOSIO_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
run-integration-tests:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
integration_test_suite:
|
||||
- "integration_test_summarizer.py"
|
||||
services:
|
||||
qdrant:
|
||||
image: qdrant/qdrant
|
||||
ports:
|
||||
- 6333:6333
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg17
|
||||
ports:
|
||||
- 5432:5432
|
||||
env:
|
||||
POSTGRES_HOST_AUTH_METHOD: trust
|
||||
POSTGRES_DB: postgres
|
||||
POSTGRES_USER: postgres
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python, Poetry, and Dependencies
|
||||
uses: packetcoders/action-setup-cache-python-poetry@main
|
||||
with:
|
||||
python-version: "3.12"
|
||||
poetry-version: "1.8.2"
|
||||
install-args: "-E dev -E postgres -E milvus -E external-tools -E tests"
|
||||
- name: Migrate database
|
||||
env:
|
||||
LETTA_PG_PORT: 5432
|
||||
LETTA_PG_USER: postgres
|
||||
LETTA_PG_PASSWORD: postgres
|
||||
LETTA_PG_DB: postgres
|
||||
LETTA_PG_HOST: localhost
|
||||
run: |
|
||||
psql -h localhost -U postgres -d postgres -c 'CREATE EXTENSION vector'
|
||||
poetry run alembic upgrade head
|
||||
- name: Run core unit tests
|
||||
env:
|
||||
LETTA_PG_PORT: 5432
|
||||
LETTA_PG_USER: postgres
|
||||
LETTA_PG_PASSWORD: postgres
|
||||
LETTA_PG_DB: postgres
|
||||
LETTA_PG_HOST: localhost
|
||||
LETTA_SERVER_PASS: test_server_token
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/${{ matrix.integration_test_suite }}
|
||||
12
.github/workflows/test_anthropic.yml
vendored
12
.github/workflows/test_anthropic.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_valid_first_message
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_valid_first_message
|
||||
echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_keyword
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_keyword
|
||||
echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_uses_external_tool
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_uses_external_tool
|
||||
echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_recall_chat_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_recall_chat_memory
|
||||
echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_archival_memory_retrieval
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_archival_memory_retrieval
|
||||
echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_edit_core_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_edit_core_memory
|
||||
echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
|
||||
12
.github/workflows/test_azure.yml
vendored
12
.github/workflows/test_azure.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_valid_first_message
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_valid_first_message
|
||||
echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_keyword
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_keyword
|
||||
echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_uses_external_tool
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_uses_external_tool
|
||||
echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_recall_chat_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_recall_chat_memory
|
||||
echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_archival_memory_retrieval
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_archival_memory_retrieval
|
||||
echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_edit_core_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_edit_core_memory
|
||||
echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
|
||||
12
.github/workflows/test_groq.yml
vendored
12
.github/workflows/test_groq.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
env:
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_valid_first_message
|
||||
echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
env:
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_keyword
|
||||
echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
env:
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_uses_external_tool
|
||||
echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
env:
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_recall_chat_memory
|
||||
echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
env:
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_archival_memory_retrieval
|
||||
echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
env:
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_edit_core_memory
|
||||
echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
|
||||
4
.github/workflows/test_memgpt_hosted.yml
vendored
4
.github/workflows/test_memgpt_hosted.yml
vendored
@@ -23,9 +23,9 @@ jobs:
|
||||
|
||||
- name: Test LLM endpoint
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_letta_hosted
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_letta_hosted
|
||||
continue-on-error: true
|
||||
|
||||
- name: Test embedding endpoint
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_letta_hosted
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_letta_hosted
|
||||
|
||||
4
.github/workflows/test_ollama.yml
vendored
4
.github/workflows/test_ollama.yml
vendored
@@ -34,11 +34,11 @@ jobs:
|
||||
|
||||
- name: Test LLM endpoint
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_ollama
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_ollama
|
||||
|
||||
- name: Test embedding endpoint
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_ollama
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_ollama
|
||||
|
||||
- name: Test provider
|
||||
run: |
|
||||
|
||||
16
.github/workflows/test_openai.yml
vendored
16
.github/workflows/test_openai.yml
vendored
@@ -29,53 +29,53 @@ jobs:
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_valid_first_message
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_valid_first_message
|
||||
|
||||
- name: Test model sends message with keyword
|
||||
id: test_keyword_message
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_keyword
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_keyword
|
||||
|
||||
- name: Test model uses external tool correctly
|
||||
id: test_external_tool
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_uses_external_tool
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_uses_external_tool
|
||||
|
||||
- name: Test model recalls chat memory
|
||||
id: test_chat_memory
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_recall_chat_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_recall_chat_memory
|
||||
|
||||
- name: Test model uses 'archival_memory_search' to find secret
|
||||
id: test_archival_memory_search
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_retrieval
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_retrieval
|
||||
|
||||
- name: Test model uses 'archival_memory_insert' to insert archival memories
|
||||
id: test_archival_memory_insert
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_insert
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_insert
|
||||
|
||||
- name: Test model can edit core memories
|
||||
id: test_core_memory
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_edit_core_memory
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_edit_core_memory
|
||||
|
||||
- name: Test embedding endpoint
|
||||
id: test_embedding_endpoint
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai
|
||||
poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_openai
|
||||
|
||||
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@@ -131,4 +131,4 @@ jobs:
|
||||
LETTA_SERVER_PASS: test_server_token
|
||||
PYTHONPATH: ${{ github.workspace }}:${{ env.PYTHONPATH }}
|
||||
run: |
|
||||
poetry run pytest -s -vv -k "not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_endpoints and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests
|
||||
poetry run pytest -s -vv -k "not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_perfomance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1018,3 +1018,8 @@ pgdata/
|
||||
letta/.pytest_cache/
|
||||
memgpy/pytest.ini
|
||||
**/**/pytest_cache
|
||||
|
||||
|
||||
# local sandbox venvs
|
||||
letta/services/tool_sandbox_env/*
|
||||
tests/test_tool_sandbox/*
|
||||
|
||||
@@ -9,7 +9,7 @@ from tests.helpers.endpoints_helper import (
|
||||
setup_agent,
|
||||
)
|
||||
from tests.helpers.utils import cleanup
|
||||
from tests.test_endpoints import llm_config_dir
|
||||
from tests.test_model_letta_perfomance import llm_config_dir
|
||||
|
||||
"""
|
||||
This example shows how you can constrain tool calls in your agent.
|
||||
|
||||
@@ -48,6 +48,7 @@ from letta.schemas.tool_rule import TerminalToolRule
|
||||
from letta.schemas.usage import LettaUsageStatistics
|
||||
from letta.services.source_manager import SourceManager
|
||||
from letta.services.user_manager import UserManager
|
||||
from letta.streaming_interface import StreamingRefreshCLIInterface
|
||||
from letta.system import (
|
||||
get_heartbeat,
|
||||
get_initial_boot_messages,
|
||||
@@ -229,7 +230,7 @@ class BaseAgent(ABC):
|
||||
class Agent(BaseAgent):
|
||||
def __init__(
|
||||
self,
|
||||
interface: Optional[AgentInterface],
|
||||
interface: Optional[Union[AgentInterface, StreamingRefreshCLIInterface]],
|
||||
# agents can be created from providing agent_state
|
||||
agent_state: AgentState,
|
||||
tools: List[Tool],
|
||||
|
||||
@@ -242,26 +242,28 @@ def convert_anthropic_response_to_chatcompletion(
|
||||
finish_reason = remap_finish_reason(response_json["stop_reason"])
|
||||
|
||||
if isinstance(response_json["content"], list):
|
||||
# inner mono + function call
|
||||
# TODO relax asserts
|
||||
assert len(response_json["content"]) == 2, response_json
|
||||
assert response_json["content"][0]["type"] == "text", response_json
|
||||
assert response_json["content"][1]["type"] == "tool_use", response_json
|
||||
content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=response_json["content"][1]["id"],
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=response_json["content"][1]["name"],
|
||||
arguments=json.dumps(response_json["content"][1]["input"], indent=2),
|
||||
),
|
||||
)
|
||||
]
|
||||
if len(response_json["content"]) > 1:
|
||||
# inner mono + function call
|
||||
assert len(response_json["content"]) == 2, response_json
|
||||
assert response_json["content"][0]["type"] == "text", response_json
|
||||
assert response_json["content"][1]["type"] == "tool_use", response_json
|
||||
content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=response_json["content"][1]["id"],
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=response_json["content"][1]["name"],
|
||||
arguments=json.dumps(response_json["content"][1]["input"], indent=2),
|
||||
),
|
||||
)
|
||||
]
|
||||
else:
|
||||
# Just inner mono
|
||||
content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
|
||||
tool_calls = None
|
||||
else:
|
||||
# just inner mono
|
||||
content = strip_xml_tags(string=response_json["content"], tag=inner_thoughts_xml_tag)
|
||||
tool_calls = None
|
||||
raise RuntimeError("Unexpected type for content in response_json.")
|
||||
|
||||
assert response_json["role"] == "assistant", response_json
|
||||
choice = Choice(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"context_window": 200000,
|
||||
"model": "claude-3-opus-20240229",
|
||||
"model": "claude-3-5-haiku-20241022",
|
||||
"model_endpoint_type": "anthropic",
|
||||
"model_endpoint": "https://api.anthropic.com/v1",
|
||||
"model_wrapper": null,
|
||||
68
tests/integration_test_summarizer.py
Normal file
68
tests/integration_test_summarizer.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from letta import create_client
|
||||
from letta.agent import Agent
|
||||
from letta.schemas.embedding_config import EmbeddingConfig
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.streaming_interface import StreamingRefreshCLIInterface
|
||||
from tests.helpers.endpoints_helper import EMBEDDING_CONFIG_PATH
|
||||
from tests.helpers.utils import cleanup
|
||||
|
||||
# constants
|
||||
LLM_CONFIG_DIR = "tests/configs/llm_model_configs"
|
||||
SUMMARY_KEY_PHRASE = "The following is a summary"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"config_filename",
|
||||
[
|
||||
"openai-gpt-4o.json",
|
||||
"azure-gpt-4o-mini.json",
|
||||
"claude-3-5-haiku.json",
|
||||
# "groq.json", TODO: Support groq, rate limiting currently makes it impossible to test
|
||||
# "gemini-pro.json", TODO: Gemini is broken
|
||||
],
|
||||
)
|
||||
def test_summarizer(config_filename):
|
||||
namespace = uuid.NAMESPACE_DNS
|
||||
agent_name = str(uuid.uuid5(namespace, f"integration-test-summarizer-{config_filename}"))
|
||||
|
||||
# Get the LLM config
|
||||
filename = os.path.join(LLM_CONFIG_DIR, config_filename)
|
||||
config_data = json.load(open(filename, "r"))
|
||||
|
||||
# Create client and clean up agents
|
||||
llm_config = LLMConfig(**config_data)
|
||||
embedding_config = EmbeddingConfig(**json.load(open(EMBEDDING_CONFIG_PATH)))
|
||||
client = create_client()
|
||||
client.set_default_llm_config(llm_config)
|
||||
client.set_default_embedding_config(embedding_config)
|
||||
cleanup(client=client, agent_uuid=agent_name)
|
||||
|
||||
# Create agent
|
||||
agent_state = client.create_agent(name=agent_name, llm_config=llm_config, embedding_config=embedding_config)
|
||||
tools = [client.get_tool(client.get_tool_id(name=tool_name)) for tool_name in agent_state.tools]
|
||||
letta_agent = Agent(interface=StreamingRefreshCLIInterface(), agent_state=agent_state, tools=tools, first_message_verify_mono=False)
|
||||
|
||||
# Make conversation
|
||||
messages = [
|
||||
"Did you know that honey never spoils? Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still perfectly edible.",
|
||||
"Octopuses have three hearts, and two of them stop beating when they swim.",
|
||||
]
|
||||
|
||||
for m in messages:
|
||||
letta_agent.step_user_message(
|
||||
user_message_str=m,
|
||||
first_message=False,
|
||||
skip_verify=False,
|
||||
stream=False,
|
||||
ms=client.server.ms,
|
||||
)
|
||||
|
||||
# Invoke a summarize
|
||||
letta_agent.summarize_messages_inplace(preserve_last_N_messages=False)
|
||||
assert SUMMARY_KEY_PHRASE in letta_agent.messages[1]["content"], f"Test failed for config: {config_filename}"
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
@@ -13,12 +12,11 @@ from tests.helpers.endpoints_helper import (
|
||||
setup_agent,
|
||||
)
|
||||
from tests.helpers.utils import cleanup
|
||||
from tests.test_endpoints import llm_config_dir
|
||||
|
||||
# Generate uuid for agent name for this example
|
||||
namespace = uuid.NAMESPACE_DNS
|
||||
agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph"))
|
||||
config_file = os.path.join(llm_config_dir, "openai-gpt-4o.json")
|
||||
config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json"
|
||||
|
||||
"""Contrived tools for this test case"""
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@ def retry_until_threshold(threshold=0.5, max_attempts=10, sleep_time_seconds=4):
|
||||
# ======================================================================================================================
|
||||
# OPENAI TESTS
|
||||
# ======================================================================================================================
|
||||
@retry_until_threshold(threshold=0.75, max_attempts=4)
|
||||
def test_openai_gpt_4o_returns_valid_first_message():
|
||||
filename = os.path.join(llm_config_dir, "openai-gpt-4o.json")
|
||||
response = check_first_response_is_valid_for_llm_endpoint(filename)
|
||||
@@ -205,44 +206,44 @@ def test_embedding_endpoint_ollama():
|
||||
# ======================================================================================================================
|
||||
# ANTHROPIC TESTS
|
||||
# ======================================================================================================================
|
||||
def test_claude_opus_3_returns_valid_first_message():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
|
||||
def test_claude_haiku_3_5_returns_valid_first_message():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
|
||||
response = check_first_response_is_valid_for_llm_endpoint(filename)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
def test_claude_opus_3_returns_keyword():
|
||||
def test_claude_haiku_3_5_returns_keyword():
|
||||
keyword = "banana"
|
||||
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
|
||||
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
|
||||
response = check_response_contains_keyword(filename, keyword=keyword)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
def test_claude_opus_3_uses_external_tool():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
|
||||
def test_claude_haiku_3_5_uses_external_tool():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
|
||||
response = check_agent_uses_external_tool(filename)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
def test_claude_opus_3_recall_chat_memory():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
|
||||
def test_claude_haiku_3_5_recall_chat_memory():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
|
||||
response = check_agent_recall_chat_memory(filename)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
def test_claude_opus_3_archival_memory_retrieval():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
|
||||
def test_claude_haiku_3_5_archival_memory_retrieval():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
|
||||
response = check_agent_archival_memory_retrieval(filename)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
def test_claude_opus_3_edit_core_memory():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-opus.json")
|
||||
def test_claude_haiku_3_5_edit_core_memory():
|
||||
filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json")
|
||||
response = check_agent_edit_core_memory(filename)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
Reference in New Issue
Block a user