diff --git a/letta/agent.py b/letta/agent.py index 0cbaff68..1096668e 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -237,8 +237,8 @@ class Agent(BaseAgent): ) function_response, updated_agent_state = sandbox_run_result.func_return, sandbox_run_result.agent_state assert orig_memory_str == self.agent_state.memory.compile(), "Memory should not be modified in a sandbox tool" - - self.update_memory_if_change(updated_agent_state.memory) + if updated_agent_state is not None: + self.update_memory_if_change(updated_agent_state.memory) except Exception as e: # Need to catch error here, or else trunction wont happen # TODO: modify to function execution error @@ -251,7 +251,7 @@ class Agent(BaseAgent): def _get_ai_reply( self, message_sequence: List[Message], - function_call: str = "auto", + function_call: Optional[str] = None, first_message: bool = False, stream: bool = False, # TODO move to config? empty_response_retry_limit: int = 3, diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py index 1244b6ff..7c99bbcd 100644 --- a/letta/llm_api/helpers.py +++ b/letta/llm_api/helpers.py @@ -250,6 +250,8 @@ def unpack_all_inner_thoughts_from_kwargs( def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice: message = choice.message + rewritten_choice = choice # inner thoughts unpacked out of the function + if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1: if len(message.tool_calls) > 1: warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported") @@ -271,14 +273,18 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) - warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})") new_choice.message.content = inner_thoughts - return new_choice + # update the choice object + rewritten_choice = new_choice else: warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}") - return choice except json.JSONDecodeError as e: warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}") raise e + else: + warnings.warn(f"Did not find tool call in message: {str(message)}") + + return rewritten_choice def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool: diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 578779d7..146c1209 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -110,7 +110,7 @@ def create( user_id: Optional[str] = None, # option UUID to associate request with functions: Optional[list] = None, functions_python: Optional[dict] = None, - function_call: str = "auto", + function_call: Optional[str] = None, # see: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice # hint first_message: bool = False, force_tool_call: Optional[str] = None, # Force a specific tool to be called @@ -148,10 +148,19 @@ def create( # openai if llm_config.model_endpoint_type == "openai": + if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1": # only is a problem if we are *not* using an openai proxy raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"]) + if function_call is None and functions is not None and len(functions) > 0: + # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice + # TODO(matt) move into LLMConfig + if llm_config.model_endpoint == "https://inference.memgpt.ai": + function_call = "auto" # TODO change to "required" once proxy supports it + else: + function_call = "required" + data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens) if stream: # Client requested token streaming data.stream = True @@ -255,12 +264,7 @@ def create( tool_call = None if force_tool_call is not None: - tool_call = { - "type": "function", - "function": { - "name": force_tool_call - } - } + tool_call = {"type": "function", "function": {"name": force_tool_call}} assert functions is not None return anthropic_chat_completions_request( diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py index 4e6b80ec..8f23e42a 100644 --- a/letta/services/agent_manager.py +++ b/letta/services/agent_manager.py @@ -336,7 +336,7 @@ class AgentManager: curr_memory_str = agent_state.memory.compile() if curr_memory_str in curr_system_message_openai["content"] and not force: # NOTE: could this cause issues if a block is removed? (substring match would still work) - logger.info( + logger.debug( f"Memory hasn't changed for agent id={agent_id} and actor=({actor.id}, {actor.name}), skipping system prompt rebuild" ) return agent_state diff --git a/tests/helpers/endpoints_helper.py b/tests/helpers/endpoints_helper.py index 87997aaf..eb55aaed 100644 --- a/tests/helpers/endpoints_helper.py +++ b/tests/helpers/endpoints_helper.py @@ -127,7 +127,11 @@ def check_first_response_is_valid_for_llm_endpoint(filename: str) -> ChatComplet choice = response.choices[0] # Ensure that the first message returns a "send_message" - validator_func = lambda function_call: function_call.name == "send_message" or function_call.name == "archival_memory_search" + validator_func = ( + lambda function_call: function_call.name == "send_message" + or function_call.name == "archival_memory_search" + or function_call.name == "core_memory_append" + ) assert_contains_valid_function_call(choice.message, validator_func) # Assert that the message has an inner monologue diff --git a/tests/integration_test_agent_tool_graph.py b/tests/integration_test_agent_tool_graph.py index 44aad0d0..bec04077 100644 --- a/tests/integration_test_agent_tool_graph.py +++ b/tests/integration_test_agent_tool_graph.py @@ -2,6 +2,7 @@ import time import uuid import pytest + from letta import create_client from letta.schemas.letta_message import ToolCallMessage from letta.schemas.tool_rule import ( @@ -42,7 +43,7 @@ def second_secret_word(prev_secret_word: str): prev_secret_word (str): The secret word retrieved from calling first_secret_word. """ if prev_secret_word != "v0iq020i0g": - raise RuntimeError(f"Expected secret {"v0iq020i0g"}, got {prev_secret_word}") + raise RuntimeError(f"Expected secret {'v0iq020i0g'}, got {prev_secret_word}") return "4rwp2b4gxq" @@ -55,7 +56,7 @@ def third_secret_word(prev_secret_word: str): prev_secret_word (str): The secret word retrieved from calling second_secret_word. """ if prev_secret_word != "4rwp2b4gxq": - raise RuntimeError(f"Expected secret {"4rwp2b4gxq"}, got {prev_secret_word}") + raise RuntimeError(f'Expected secret "4rwp2b4gxq", got {prev_secret_word}') return "hj2hwibbqm" @@ -68,7 +69,7 @@ def fourth_secret_word(prev_secret_word: str): prev_secret_word (str): The secret word retrieved from calling third_secret_word. """ if prev_secret_word != "hj2hwibbqm": - raise RuntimeError(f"Expected secret {"hj2hwibbqm"}, got {prev_secret_word}") + raise RuntimeError(f"Expected secret {'hj2hwibbqm'}, got {prev_secret_word}") return "banana" @@ -194,16 +195,13 @@ def test_check_tool_rules_with_different_models(mock_e2b_api_key_none): "tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json", "tests/configs/llm_model_configs/openai-gpt-4o.json", ] - + # Create two test tools t1_name = "first_secret_word" t2_name = "second_secret_word" t1 = client.create_or_update_tool(first_secret_word, name=t1_name) t2 = client.create_or_update_tool(second_secret_word, name=t2_name) - tool_rules = [ - InitToolRule(tool_name=t1_name), - InitToolRule(tool_name=t2_name) - ] + tool_rules = [InitToolRule(tool_name=t1_name), InitToolRule(tool_name=t2_name)] tools = [t1, t2] for config_file in config_files: @@ -212,34 +210,26 @@ def test_check_tool_rules_with_different_models(mock_e2b_api_key_none): if "gpt-4o" in config_file: # Structured output model (should work with multiple init tools) - agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, - tool_ids=[t.id for t in tools], - tool_rules=tool_rules) + agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) assert agent_state is not None else: # Non-structured output model (should raise error with multiple init tools) with pytest.raises(ValueError, match="Multiple initial tools are not supported for non-structured models"): - setup_agent(client, config_file, agent_uuid=agent_uuid, - tool_ids=[t.id for t in tools], - tool_rules=tool_rules) - + setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) + # Cleanup cleanup(client=client, agent_uuid=agent_uuid) # Create tool rule with single initial tool t3_name = "third_secret_word" t3 = client.create_or_update_tool(third_secret_word, name=t3_name) - tool_rules = [ - InitToolRule(tool_name=t3_name) - ] + tool_rules = [InitToolRule(tool_name=t3_name)] tools = [t3] for config_file in config_files: agent_uuid = str(uuid.uuid4()) # Structured output model (should work with single init tool) - agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, - tool_ids=[t.id for t in tools], - tool_rules=tool_rules) + agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) assert agent_state is not None cleanup(client=client, agent_uuid=agent_uuid) @@ -257,7 +247,7 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): tool_rules = [ InitToolRule(tool_name=t1_name), ChildToolRule(tool_name=t1_name, children=[t2_name]), - TerminalToolRule(tool_name=t2_name) + TerminalToolRule(tool_name=t2_name), ] tools = [t1, t2] @@ -265,7 +255,9 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): anthropic_config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" for i in range(3): agent_uuid = str(uuid.uuid4()) - agent_state = setup_agent(client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) + agent_state = setup_agent( + client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules + ) response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?") assert_sanity_checks(response) @@ -289,9 +281,10 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): # Implement exponential backoff with initial time of 10 seconds if i < 2: - backoff_time = 10 * (2 ** i) + backoff_time = 10 * (2**i) time.sleep(backoff_time) + @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): client = create_client() @@ -389,7 +382,7 @@ def test_agent_conditional_tool_easy(mock_e2b_api_key_none): default_child=coin_flip_name, child_output_mapping={ "hj2hwibbqm": secret_word_tool, - } + }, ), TerminalToolRule(tool_name=secret_word_tool), ] @@ -425,7 +418,6 @@ def test_agent_conditional_tool_easy(mock_e2b_api_key_none): cleanup(client=client, agent_uuid=agent_uuid) - @pytest.mark.timeout(90) # Longer timeout since this test has more steps def test_agent_conditional_tool_hard(mock_e2b_api_key_none): """ @@ -450,7 +442,7 @@ def test_agent_conditional_tool_hard(mock_e2b_api_key_none): final_tool = "fourth_secret_word" play_game_tool = client.create_or_update_tool(can_play_game, name=play_game) flip_coin_tool = client.create_or_update_tool(flip_coin_hard, name=coin_flip_name) - reveal_secret = client.create_or_update_tool(fourth_secret_word, name=final_tool) + reveal_secret = client.create_or_update_tool(fourth_secret_word, name=final_tool) # Make tool rules - chain them together with conditional rules tool_rules = [ @@ -458,16 +450,10 @@ def test_agent_conditional_tool_hard(mock_e2b_api_key_none): ConditionalToolRule( tool_name=play_game, default_child=play_game, # Keep trying if we can't play - child_output_mapping={ - True: coin_flip_name # Only allow access when can_play_game returns True - } + child_output_mapping={True: coin_flip_name}, # Only allow access when can_play_game returns True ), ConditionalToolRule( - tool_name=coin_flip_name, - default_child=coin_flip_name, - child_output_mapping={ - "hj2hwibbqm": final_tool, "START_OVER": play_game - } + tool_name=coin_flip_name, default_child=coin_flip_name, child_output_mapping={"hj2hwibbqm": final_tool, "START_OVER": play_game} ), TerminalToolRule(tool_name=final_tool), ] @@ -475,13 +461,7 @@ def test_agent_conditional_tool_hard(mock_e2b_api_key_none): # Setup agent with all tools tools = [play_game_tool, flip_coin_tool, reveal_secret] config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" - agent_state = setup_agent( - client, - config_file, - agent_uuid=agent_uuid, - tool_ids=[t.id for t in tools], - tool_rules=tool_rules - ) + agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Ask agent to try to get all secret words response = client.user_message(agent_id=agent_state.id, message="hi") @@ -520,7 +500,7 @@ def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none): Test the agent with a conditional tool that allows any child tool to be called if a function returns None. Tool Flow: - + return_none | v @@ -541,27 +521,16 @@ def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none): ConditionalToolRule( tool_name=tool_name, default_child=None, # Allow any tool to be called if output doesn't match - child_output_mapping={ - "anything but none": "first_secret_word" - } - ) + child_output_mapping={"anything but none": "first_secret_word"}, + ), ] tools = [tool, secret_word] # Setup agent with all tools - agent_state = setup_agent( - client, - config_file, - agent_uuid=agent_uuid, - tool_ids=[t.id for t in tools], - tool_rules=tool_rules - ) + agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Ask agent to try different tools based on the game output - response = client.user_message( - agent_id=agent_state.id, - message="call a function, any function. then call send_message" - ) + response = client.user_message(agent_id=agent_state.id, message="call a function, any function. then call send_message") # Make checks assert_sanity_checks(response) @@ -613,18 +582,14 @@ def test_agent_reload_remembers_function_response(mock_e2b_api_key_none): ConditionalToolRule( tool_name=flip_coin_name, default_child=flip_coin_name, # Allow any tool to be called if output doesn't match - child_output_mapping={ - "hj2hwibbqm": secret_word - } + child_output_mapping={"hj2hwibbqm": secret_word}, ), - TerminalToolRule(tool_name=secret_word) + TerminalToolRule(tool_name=secret_word), ] tools = [flip_coin_tool, secret_word_tool] # Setup initial agent - agent_state = setup_agent( - client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules - ) + agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Call flip_coin first response = client.user_message(agent_id=agent_state.id, message="flip a coin") @@ -643,4 +608,4 @@ def test_agent_reload_remembers_function_response(mock_e2b_api_key_none): assert reloaded_agent.last_function_response is not None print(f"Got successful response from client: \n\n{response}") - cleanup(client=client, agent_uuid=agent_uuid) \ No newline at end of file + cleanup(client=client, agent_uuid=agent_uuid)