diff --git a/letta/__init__.py b/letta/__init__.py index 46390abe..e67194c6 100644 --- a/letta/__init__.py +++ b/letta/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.6.6" +__version__ = "0.6.7" # import clients from letta.client.client import LocalClient, RESTClient, create_client diff --git a/letta/agent.py b/letta/agent.py index f61ace90..483d3cb8 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -224,8 +224,8 @@ class Agent(BaseAgent): ) function_response, updated_agent_state = sandbox_run_result.func_return, sandbox_run_result.agent_state assert orig_memory_str == self.agent_state.memory.compile(), "Memory should not be modified in a sandbox tool" - - self.update_memory_if_change(updated_agent_state.memory) + if updated_agent_state is not None: + self.update_memory_if_change(updated_agent_state.memory) except Exception as e: # Need to catch error here, or else trunction wont happen # TODO: modify to function execution error @@ -238,7 +238,7 @@ class Agent(BaseAgent): def _get_ai_reply( self, message_sequence: List[Message], - function_call: str = "auto", + function_call: Optional[str] = None, first_message: bool = False, stream: bool = False, # TODO move to config? empty_response_retry_limit: int = 3, @@ -1029,6 +1029,7 @@ class Agent(BaseAgent): num_archival_memory=agent_manager_passage_size, num_recall_memory=message_manager_size, num_tokens_external_memory_summary=num_tokens_external_memory_summary, + external_memory_summary=external_memory_summary, # top-level information context_window_size_max=self.agent_state.llm_config.context_window, context_window_size_current=num_tokens_used_total, diff --git a/letta/cli/cli_config.py b/letta/cli/cli_config.py index 8278d553..87e43567 100644 --- a/letta/cli/cli_config.py +++ b/letta/cli/cli_config.py @@ -60,7 +60,6 @@ def list(arg: Annotated[ListChoice, typer.Argument]): table.field_names = ["Name", "Text"] for human in client.list_humans(): table.add_row([human.template_name, human.value.replace("\n", "")[:100]]) - print(table) elif arg == ListChoice.personas: """List all personas""" table.field_names = ["Name", "Text"] diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py index 1244b6ff..7c99bbcd 100644 --- a/letta/llm_api/helpers.py +++ b/letta/llm_api/helpers.py @@ -250,6 +250,8 @@ def unpack_all_inner_thoughts_from_kwargs( def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice: message = choice.message + rewritten_choice = choice # inner thoughts unpacked out of the function + if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1: if len(message.tool_calls) > 1: warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported") @@ -271,14 +273,18 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) - warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})") new_choice.message.content = inner_thoughts - return new_choice + # update the choice object + rewritten_choice = new_choice else: warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}") - return choice except json.JSONDecodeError as e: warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}") raise e + else: + warnings.warn(f"Did not find tool call in message: {str(message)}") + + return rewritten_choice def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool: diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 030d7375..d83e8699 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -94,7 +94,7 @@ def create( user_id: Optional[str] = None, # option UUID to associate request with functions: Optional[list] = None, functions_python: Optional[dict] = None, - function_call: str = "auto", + function_call: Optional[str] = None, # see: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice # hint first_message: bool = False, force_tool_call: Optional[str] = None, # Force a specific tool to be called @@ -132,10 +132,19 @@ def create( # openai if llm_config.model_endpoint_type == "openai": + if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1": # only is a problem if we are *not* using an openai proxy raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"]) + if function_call is None and functions is not None and len(functions) > 0: + # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice + # TODO(matt) move into LLMConfig + if llm_config.model_endpoint == "https://inference.memgpt.ai": + function_call = "auto" # TODO change to "required" once proxy supports it + else: + function_call = "required" + data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens) if stream: # Client requested token streaming data.stream = True diff --git a/letta/schemas/letta_response.py b/letta/schemas/letta_response.py index d7019280..fc969d66 100644 --- a/letta/schemas/letta_response.py +++ b/letta/schemas/letta_response.py @@ -66,7 +66,7 @@ class LettaResponse(BaseModel): return f'