diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py index ee4e7954..ca0c25f2 100644 --- a/letta/llm_api/openai.py +++ b/letta/llm_api/openai.py @@ -5,7 +5,7 @@ import requests from openai import OpenAI from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request -from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION +from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as _Message @@ -96,10 +96,15 @@ def build_openai_chat_completions_request( max_tokens: Optional[int], ) -> ChatCompletionRequest: if functions and llm_config.put_inner_thoughts_in_kwargs: + # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first + # TODO(fix) + inner_thoughts_desc = ( + INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION + ) functions = add_inner_thoughts_to_functions( functions=functions, inner_thoughts_key=INNER_THOUGHTS_KWARG, - inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION, + inner_thoughts_description=inner_thoughts_desc, ) openai_message_list = [ diff --git a/letta/local_llm/constants.py b/letta/local_llm/constants.py index 03abcc81..f4c66a47 100644 --- a/letta/local_llm/constants.py +++ b/letta/local_llm/constants.py @@ -27,6 +27,7 @@ DEFAULT_WRAPPER_NAME = "chatml" INNER_THOUGHTS_KWARG = "inner_thoughts" INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only." +INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST = f"Deep inner monologue private to you only. Think before you act, so always generate arg '{INNER_THOUGHTS_KWARG}' first before any other arg." INNER_THOUGHTS_CLI_SYMBOL = "💭" ASSISTANT_MESSAGE_CLI_SYMBOL = "🤖" diff --git a/letta/schemas/message.py b/letta/schemas/message.py index 4377581a..b865671d 100644 --- a/letta/schemas/message.py +++ b/letta/schemas/message.py @@ -1,6 +1,7 @@ import copy import json import warnings +from collections import OrderedDict from datetime import datetime, timezone from typing import Any, Dict, List, Literal, Optional, Union @@ -33,18 +34,18 @@ def add_inner_thoughts_to_tool_call( inner_thoughts_key: str, ) -> OpenAIToolCall: """Add inner thoughts (arg + value) to a tool call""" - # because the kwargs are stored as strings, we need to load then write the JSON dicts try: # load the args list func_args = json.loads(tool_call.function.arguments) - # add the inner thoughts to the args list - func_args[inner_thoughts_key] = inner_thoughts + # create new ordered dict with inner thoughts first + ordered_args = OrderedDict({inner_thoughts_key: inner_thoughts}) + # update with remaining args + ordered_args.update(func_args) # create the updated tool call (as a string) updated_tool_call = copy.deepcopy(tool_call) - updated_tool_call.function.arguments = json_dumps(func_args) + updated_tool_call.function.arguments = json_dumps(ordered_args) return updated_tool_call except json.JSONDecodeError as e: - # TODO: change to logging warnings.warn(f"Failed to put inner thoughts in kwargs: {e}") raise e diff --git a/letta/server/server.py b/letta/server/server.py index 1ef4c407..c9780fdd 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -397,11 +397,16 @@ class SyncServer(Server): ) # Attempt to enable LM Studio by default if model_settings.lmstudio_base_url: - self._enabled_providers.append( - LMStudioOpenAIProvider( - base_url=model_settings.lmstudio_base_url, - ) + # Auto-append v1 to the base URL + lmstudio_url = ( + model_settings.lmstudio_base_url + if model_settings.lmstudio_base_url.endswith("/v1") + else model_settings.lmstudio_base_url + "/v1" ) + # Set the OpenAI API key to something non-empty + if model_settings.openai_api_key is None: + model_settings.openai_api_key = "DUMMY" + self._enabled_providers.append(LMStudioOpenAIProvider(base_url=lmstudio_url)) def load_agent(self, agent_id: str, actor: User, interface: Union[AgentInterface, None] = None) -> Agent: """Updated method to load agents from persisted storage"""