From 12c2b494616ff20b2767c5e28285bb8bfe37de7f Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Sun, 6 Jul 2025 11:05:31 -0700 Subject: [PATCH] fix: add frequency penalty for gpt-4o-mini (#3166) --- letta/llm_api/openai_client.py | 4 ++++ letta/schemas/llm_config.py | 4 ++++ letta/schemas/providers.py | 27 +++++++++++++++++---------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index ea17d0da..3b12095f 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -216,6 +216,10 @@ class OpenAIClient(LLMClientBase): # NOTE: the reasoners that don't support temperature require 1.0, not None temperature=llm_config.temperature if supports_temperature_param(model) else 1.0, ) + + if llm_config.frequency_penalty is not None: + data.frequency_penalty = llm_config.frequency_penalty + if tools and supports_parallel_tool_calling(model): data.parallel_tool_calls = False diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index ab024708..cccf93b0 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -77,6 +77,10 @@ class LLMConfig(BaseModel): max_reasoning_tokens: int = Field( 0, description="Configurable thinking budget for extended thinking, only used if enable_reasoner is True. Minimum value is 1024." ) + frequency_penalty: Optional[float] = Field( + None, # Can also deafult to 0.0? + description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.", + ) # FIXME hack to silence pydantic protected namespace warning model_config = ConfigDict(protected_namespaces=()) diff --git a/letta/schemas/providers.py b/letta/schemas/providers.py index 9e741dd8..51d988bc 100644 --- a/letta/schemas/providers.py +++ b/letta/schemas/providers.py @@ -324,18 +324,25 @@ class OpenAIProvider(Provider): else: handle = self.get_handle(model_name) - configs.append( - LLMConfig( - model=model_name, - model_endpoint_type="openai", - model_endpoint=self.base_url, - context_window=context_window_size, - handle=handle, - provider_name=self.name, - provider_category=self.provider_category, - ) + llm_config = LLMConfig( + model=model_name, + model_endpoint_type="openai", + model_endpoint=self.base_url, + context_window=context_window_size, + handle=handle, + provider_name=self.name, + provider_category=self.provider_category, ) + # gpt-4o-mini has started to regress with pretty bad emoji spam loops + # this is to counteract that + if "gpt-4o-mini" in model_name: + llm_config.frequency_penalty = 1.0 + if "gpt-4.1-mini" in model_name: + llm_config.frequency_penalty = 1.0 + + configs.append(llm_config) + # for OpenAI, sort in reverse order if self.base_url == "https://api.openai.com/v1": # alphnumeric sort