fix: patch o1 and o3-mini (#1657)
This commit is contained in:
@@ -135,7 +135,7 @@ def build_openai_chat_completions_request(
|
||||
tool_choice=tool_choice,
|
||||
user=str(user_id),
|
||||
max_completion_tokens=llm_config.max_tokens,
|
||||
temperature=llm_config.temperature,
|
||||
temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
|
||||
)
|
||||
else:
|
||||
data = ChatCompletionRequest(
|
||||
@@ -145,7 +145,7 @@ def build_openai_chat_completions_request(
|
||||
function_call=function_call,
|
||||
user=str(user_id),
|
||||
max_completion_tokens=llm_config.max_tokens,
|
||||
temperature=llm_config.temperature,
|
||||
temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
|
||||
)
|
||||
# https://platform.openai.com/docs/guides/text-generation/json-mode
|
||||
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
|
||||
@@ -168,7 +168,6 @@ def build_openai_chat_completions_request(
|
||||
tool.function = FunctionSchema(**structured_output_version)
|
||||
except ValueError as e:
|
||||
warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@@ -488,4 +487,6 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
|
||||
# except ValueError as e:
|
||||
# warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
|
||||
|
||||
if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model:
|
||||
data.pop("parallel_tool_calls", None)
|
||||
return data
|
||||
|
||||
@@ -74,6 +74,13 @@ class LLMConfig(BaseModel):
|
||||
# FIXME hack to silence pydantic protected namespace warning
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def set_default_enable_reasoner(cls, values):
|
||||
if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
|
||||
values["enable_reasoner"] = True
|
||||
return values
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def set_default_put_inner_thoughts(cls, values):
|
||||
@@ -100,6 +107,9 @@ class LLMConfig(BaseModel):
|
||||
logger.warning("max_tokens must be greater than max_reasoning_tokens (thinking budget)")
|
||||
if self.put_inner_thoughts_in_kwargs:
|
||||
logger.warning("Extended thinking is not compatible with put_inner_thoughts_in_kwargs")
|
||||
elif self.max_reasoning_tokens and not self.enable_reasoner:
|
||||
logger.warning("model will not use reasoning unless enable_reasoner is set to True")
|
||||
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -133,6 +133,7 @@ class ChatCompletionRequest(BaseModel):
|
||||
temperature: Optional[float] = 1
|
||||
top_p: Optional[float] = 1
|
||||
user: Optional[str] = None # unique ID of the end-user (for monitoring)
|
||||
parallel_tool_calls: Optional[bool] = False
|
||||
|
||||
# function-calling related
|
||||
tools: Optional[List[Tool]] = None
|
||||
|
||||
Reference in New Issue
Block a user