fix: patch o1 and o3-mini (#1657)

This commit is contained in:
Kevin Lin
2025-04-10 13:50:38 -07:00
committed by GitHub
parent 63a6c6b499
commit 1453ba47c3
3 changed files with 15 additions and 3 deletions

View File

@@ -135,7 +135,7 @@ def build_openai_chat_completions_request(
tool_choice=tool_choice,
user=str(user_id),
max_completion_tokens=llm_config.max_tokens,
temperature=llm_config.temperature,
temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
)
else:
data = ChatCompletionRequest(
@@ -145,7 +145,7 @@ def build_openai_chat_completions_request(
function_call=function_call,
user=str(user_id),
max_completion_tokens=llm_config.max_tokens,
temperature=llm_config.temperature,
temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
)
# https://platform.openai.com/docs/guides/text-generation/json-mode
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
@@ -168,7 +168,6 @@ def build_openai_chat_completions_request(
tool.function = FunctionSchema(**structured_output_version)
except ValueError as e:
warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
return data
@@ -488,4 +487,6 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
# except ValueError as e:
# warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model:
data.pop("parallel_tool_calls", None)
return data

View File

@@ -74,6 +74,13 @@ class LLMConfig(BaseModel):
# FIXME hack to silence pydantic protected namespace warning
model_config = ConfigDict(protected_namespaces=())
@model_validator(mode="before")
@classmethod
def set_default_enable_reasoner(cls, values):
if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
values["enable_reasoner"] = True
return values
@model_validator(mode="before")
@classmethod
def set_default_put_inner_thoughts(cls, values):
@@ -100,6 +107,9 @@ class LLMConfig(BaseModel):
logger.warning("max_tokens must be greater than max_reasoning_tokens (thinking budget)")
if self.put_inner_thoughts_in_kwargs:
logger.warning("Extended thinking is not compatible with put_inner_thoughts_in_kwargs")
elif self.max_reasoning_tokens and not self.enable_reasoner:
logger.warning("model will not use reasoning unless enable_reasoner is set to True")
return self
@classmethod

View File

@@ -133,6 +133,7 @@ class ChatCompletionRequest(BaseModel):
temperature: Optional[float] = 1
top_p: Optional[float] = 1
user: Optional[str] = None # unique ID of the end-user (for monitoring)
parallel_tool_calls: Optional[bool] = False
# function-calling related
tools: Optional[List[Tool]] = None