Files
letta-server/letta/llm_api/chatgpt_oauth_client.py
Kian Jones d48932bdb6 fix(core): sanitize Unicode surrogates in all LLM client requests (#9323)
Multiple OpenAI-compatible LLM clients (Azure, Deepseek, Groq, Together, XAI, ZAI)
and Anthropic-compatible clients (Anthropic, MiniMax, Google Vertex) were overriding
request_async/stream_async without calling sanitize_unicode_surrogates, causing
UnicodeEncodeError when message content contained lone UTF-16 surrogates.

Root cause: Child classes override parent methods but omit the sanitization step that
the base OpenAIClient includes. This allows corrupted Unicode (unpaired surrogates
from malformed emoji) to reach the httpx layer, which rejects it during UTF-8 encoding.

Fix: Import and call sanitize_unicode_surrogates in all overridden request methods.
Also removed duplicate sanitize_unicode_surrogates definition from openai_client.py
that shadowed the canonical implementation in letta.helpers.json_helpers.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

Issue-ID: 10c0f2e4-f87b-11f0-b91c-da7ad0900000
2026-02-24 10:52:06 -08:00

1061 lines
42 KiB
Python

"""ChatGPT OAuth Client - handles requests to chatgpt.com/backend-api/codex/responses."""
import json
from typing import Any, AsyncIterator, Callable, Dict, List, Optional, Union
import httpx
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from openai.types.responses import (
Response,
ResponseCompletedEvent,
ResponseContentPartAddedEvent,
ResponseContentPartDoneEvent,
ResponseCreatedEvent,
ResponseFunctionCallArgumentsDeltaEvent,
ResponseFunctionCallArgumentsDoneEvent,
ResponseFunctionToolCall,
ResponseInProgressEvent,
ResponseOutputItemAddedEvent,
ResponseOutputItemDoneEvent,
ResponseOutputMessage,
ResponseOutputText,
ResponseReasoningItem,
ResponseReasoningSummaryPartAddedEvent,
ResponseReasoningSummaryPartDoneEvent,
ResponseReasoningSummaryTextDeltaEvent,
ResponseReasoningSummaryTextDoneEvent,
ResponseTextDeltaEvent,
ResponseTextDoneEvent,
)
from openai.types.responses.response_stream_event import ResponseStreamEvent
from letta.errors import (
ContextWindowExceededError,
ErrorCode,
LLMAuthenticationError,
LLMBadRequestError,
LLMConnectionError,
LLMRateLimitError,
LLMServerError,
LLMTimeoutError,
)
from letta.helpers.json_helpers import sanitize_unicode_surrogates
from letta.llm_api.llm_client_base import LLMClientBase
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.enums import AgentType, ProviderCategory
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as PydanticMessage
from letta.schemas.openai.chat_completion_response import (
ChatCompletionResponse,
Choice,
FunctionCall,
Message as ChoiceMessage,
ToolCall,
UsageStatistics,
)
from letta.schemas.providers.chatgpt_oauth import ChatGPTOAuthCredentials, ChatGPTOAuthProvider
from letta.schemas.usage import LettaUsageStatistics
logger = get_logger(__name__)
# ChatGPT Backend API endpoint
CHATGPT_CODEX_ENDPOINT = "https://chatgpt.com/backend-api/codex/responses"
class AsyncStreamWrapper:
"""Wraps an async generator to provide async context manager protocol.
The OpenAI SDK's AsyncStream implements __aenter__ and __aexit__,
but our custom SSE handler returns a raw async generator. This wrapper
provides the context manager protocol so it can be used with 'async with'.
"""
def __init__(self, generator: AsyncIterator[ResponseStreamEvent]):
self._generator = generator
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
# Close the generator if it has an aclose method
if hasattr(self._generator, "aclose"):
await self._generator.aclose()
return False
def __aiter__(self):
return self
async def __anext__(self) -> ResponseStreamEvent:
return await self._generator.__anext__()
class ChatGPTOAuthClient(LLMClientBase):
"""
LLM client for ChatGPT OAuth provider.
This client:
1. Transforms standard OpenAI chat format to ChatGPT backend Responses API format
2. Adds required headers (Authorization, ChatGPT-Account-Id, OpenAI-Beta, OpenAI-Originator)
3. Makes requests to chatgpt.com/backend-api/codex/responses
4. Transforms responses back to OpenAI ChatCompletion format
"""
@trace_method
async def _get_provider_and_credentials_async(self, llm_config: LLMConfig) -> tuple[ChatGPTOAuthProvider, ChatGPTOAuthCredentials]:
"""Get the ChatGPT OAuth provider and credentials with automatic refresh if needed.
Args:
llm_config: The LLM configuration containing provider info.
Returns:
Tuple of (provider, credentials).
Raises:
LLMAuthenticationError: If credentials cannot be obtained.
"""
from letta.services.provider_manager import ProviderManager
if llm_config.provider_category != ProviderCategory.byok:
raise ValueError("ChatGPT OAuth requires BYOK provider credentials")
# Get provider
provider_manager = ProviderManager()
providers = await provider_manager.list_providers_async(
name=llm_config.provider_name,
actor=self.actor,
provider_category=[ProviderCategory.byok],
)
if not providers:
raise LLMAuthenticationError(
message=f"ChatGPT OAuth provider '{llm_config.provider_name}' not found",
code=ErrorCode.UNAUTHENTICATED,
)
provider: ChatGPTOAuthProvider = providers[0].cast_to_subtype()
# Get credentials with automatic refresh (pass actor for persistence)
creds = await provider.refresh_token_if_needed(actor=self.actor)
if not creds:
raise LLMAuthenticationError(
message="Failed to obtain valid ChatGPT OAuth credentials",
code=ErrorCode.UNAUTHENTICATED,
)
return provider, creds
def _build_headers(self, creds: ChatGPTOAuthCredentials) -> Dict[str, str]:
"""Build required headers for ChatGPT backend API.
Args:
creds: OAuth credentials containing access_token and account_id.
Returns:
Dictionary of HTTP headers.
"""
return {
"Authorization": f"Bearer {creds.access_token}",
"ChatGPT-Account-Id": creds.account_id,
"OpenAI-Beta": "responses=v1",
"OpenAI-Originator": "codex",
"Content-Type": "application/json",
"accept": "text/event-stream",
}
@trace_method
def build_request_data(
self,
agent_type: AgentType,
messages: List[PydanticMessage],
llm_config: LLMConfig,
tools: Optional[List[dict]] = None,
force_tool_call: Optional[str] = None,
requires_subsequent_tool_call: bool = False,
tool_return_truncation_chars: Optional[int] = None,
) -> dict:
"""
Build request data for ChatGPT backend API in Responses API format.
The ChatGPT backend uses the OpenAI Responses API format:
- `input` array instead of `messages`
- `role: "developer"` instead of `role: "system"`
- Structured content arrays
"""
# Use the existing method to convert messages to Responses API format
input_messages = PydanticMessage.to_openai_responses_dicts_from_list(
messages,
tool_return_truncation_chars=tool_return_truncation_chars,
)
# Extract system message as instructions
instructions = None
filtered_input = []
for msg in input_messages:
if msg.get("role") == "developer":
# First developer message becomes instructions
if instructions is None:
content = msg.get("content", [])
if isinstance(content, list) and content:
instructions = content[0].get("text", "")
elif isinstance(content, str):
instructions = content
else:
filtered_input.append(msg)
else:
filtered_input.append(msg)
# Build tool_choice
tool_choice = None
if tools:
if force_tool_call is not None:
tool_choice = {"type": "function", "name": force_tool_call}
elif requires_subsequent_tool_call:
tool_choice = "required"
else:
tool_choice = "auto"
# Build request payload for ChatGPT backend
data: Dict[str, Any] = {
"model": llm_config.model,
"input": filtered_input,
"store": False, # Required for stateless operation
"stream": True, # ChatGPT backend requires streaming
}
if instructions:
data["instructions"] = instructions
if tools:
# Convert tools to Responses API format
responses_tools = [
{
"type": "function",
"name": t.get("name"),
"description": t.get("description"),
"parameters": t.get("parameters"),
}
for t in tools
]
data["tools"] = responses_tools
data["tool_choice"] = tool_choice
# Note: ChatGPT backend does NOT support max_output_tokens parameter
# Add reasoning effort for reasoning models (GPT-5.x, o-series)
if self.is_reasoning_model(llm_config) and llm_config.reasoning_effort:
data["reasoning"] = {
"effort": llm_config.reasoning_effort,
"summary": "detailed",
}
return data
def _transform_response_from_chatgpt_backend(self, response_data: dict) -> dict:
"""Transform ChatGPT backend response to standard OpenAI ChatCompletion format.
The ChatGPT backend returns responses in Responses API format.
This method normalizes them to ChatCompletion format.
Args:
response_data: Raw response from ChatGPT backend.
Returns:
Response in OpenAI ChatCompletion format.
"""
# If response is already in ChatCompletion format, return as-is
if "choices" in response_data:
return response_data
# Extract from Responses API format
output = response_data.get("output", [])
message_content = ""
tool_calls = None
reasoning_content = ""
for item in output:
item_type = item.get("type")
if item_type == "message":
content_parts = item.get("content", [])
for part in content_parts:
if part.get("type") in ("output_text", "text"):
message_content += part.get("text", "")
elif part.get("type") == "refusal":
message_content += part.get("refusal", "")
elif item_type == "function_call":
if tool_calls is None:
tool_calls = []
tool_calls.append(
{
"id": item.get("call_id", item.get("id", "")),
"type": "function",
"function": {
"name": item.get("name", ""),
"arguments": item.get("arguments", ""),
},
}
)
elif item_type == "reasoning":
# Capture reasoning/thinking content if present
summary = item.get("summary", [])
for s in summary:
if s.get("type") == "summary_text":
reasoning_content += s.get("text", "")
# Build ChatCompletion response
finish_reason = "stop"
if tool_calls:
finish_reason = "tool_calls"
transformed = {
"id": response_data.get("id", "chatgpt-response"),
"object": "chat.completion",
"created": response_data.get("created_at", 0),
"model": response_data.get("model", ""),
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": message_content or None,
"tool_calls": tool_calls,
},
"finish_reason": finish_reason,
}
],
"usage": self._transform_usage(response_data.get("usage", {})),
}
return transformed
def _transform_usage(self, usage: dict) -> dict:
"""Transform usage statistics from Responses API format."""
return {
"prompt_tokens": usage.get("input_tokens", 0),
"completion_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
}
@trace_method
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
"""Synchronous request - not recommended for ChatGPT OAuth."""
import asyncio
return asyncio.run(self.request_async(request_data, llm_config))
@trace_method
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
"""Make asynchronous request to ChatGPT backend API.
Args:
request_data: Request payload in Responses API format.
llm_config: LLM configuration.
Returns:
Response data in OpenAI ChatCompletion format.
"""
request_data = sanitize_unicode_surrogates(request_data)
_, creds = await self._get_provider_and_credentials_async(llm_config)
headers = self._build_headers(creds)
endpoint = llm_config.model_endpoint or CHATGPT_CODEX_ENDPOINT
# ChatGPT backend requires streaming, so we use client.stream() to handle SSE
async with httpx.AsyncClient() as client:
try:
async with client.stream(
"POST",
endpoint,
json=request_data,
headers=headers,
timeout=120.0,
) as response:
response.raise_for_status()
# Accumulate SSE events into a final response
return await self._accumulate_sse_response(response)
except httpx.HTTPStatusError as e:
raise self._handle_http_error(e)
except httpx.TimeoutException:
raise LLMTimeoutError(
message="ChatGPT backend request timed out",
code=ErrorCode.TIMEOUT,
)
except httpx.RequestError as e:
raise LLMConnectionError(
message=f"Failed to connect to ChatGPT backend: {str(e)}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
)
async def _accumulate_sse_response(self, response: httpx.Response) -> dict:
"""Accumulate SSE stream into a final response.
ChatGPT backend may return SSE even for non-streaming requests.
This method accumulates all events into a single response.
Args:
response: httpx Response object with SSE content.
Returns:
Accumulated response data.
"""
accumulated_content = ""
accumulated_tool_calls: List[Dict[str, Any]] = []
model = ""
response_id = ""
usage = {}
async for line in response.aiter_lines():
if not line.startswith("data: "):
continue
data_str = line[6:] # Remove "data: " prefix
if data_str == "[DONE]":
break
try:
event = json.loads(data_str)
except json.JSONDecodeError:
continue
# Extract response metadata
if not response_id and event.get("id"):
response_id = event["id"]
if not model and event.get("model"):
model = event["model"]
if event.get("usage"):
usage = event["usage"]
# Handle different event types
event_type = event.get("type")
if event_type == "response.output_item.done":
item = event.get("item", {})
item_type = item.get("type")
if item_type == "message":
for content in item.get("content", []):
if content.get("type") in ("output_text", "text"):
accumulated_content += content.get("text", "")
elif item_type == "function_call":
accumulated_tool_calls.append(
{
"id": item.get("call_id", item.get("id", "")),
"type": "function",
"function": {
"name": item.get("name", ""),
"arguments": item.get("arguments", ""),
},
}
)
elif event_type == "response.content_part.delta":
delta = event.get("delta", {})
if delta.get("type") == "text_delta":
accumulated_content += delta.get("text", "")
elif event_type == "response.done":
# Final response event
if event.get("response", {}).get("usage"):
usage = event["response"]["usage"]
# Build final response
finish_reason = "stop" if not accumulated_tool_calls else "tool_calls"
return {
"id": response_id or "chatgpt-response",
"object": "chat.completion",
"created": 0,
"model": model,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": accumulated_content or None,
"tool_calls": accumulated_tool_calls if accumulated_tool_calls else None,
},
"finish_reason": finish_reason,
}
],
"usage": self._transform_usage(usage),
}
@trace_method
async def request_embeddings(
self,
texts: List[str],
embedding_config,
) -> List[List[float]]:
"""ChatGPT backend does not support embeddings."""
raise NotImplementedError("ChatGPT OAuth does not support embeddings")
@trace_method
async def convert_response_to_chat_completion(
self,
response_data: dict,
input_messages: List[PydanticMessage],
llm_config: LLMConfig,
) -> ChatCompletionResponse:
"""Convert response to ChatCompletionResponse.
Args:
response_data: Response data (already in ChatCompletion format).
input_messages: Original input messages.
llm_config: LLM configuration.
Returns:
ChatCompletionResponse object.
"""
# Response should already be in ChatCompletion format after transformation
return ChatCompletionResponse(**response_data)
def extract_usage_statistics(self, response_data: dict | None, llm_config: LLMConfig) -> LettaUsageStatistics:
"""Extract usage statistics from ChatGPT OAuth response and return as LettaUsageStatistics."""
if not response_data:
return LettaUsageStatistics()
usage = response_data.get("usage")
if not usage:
return LettaUsageStatistics()
prompt_tokens = usage.get("prompt_tokens") or 0
completion_tokens = usage.get("completion_tokens") or 0
total_tokens = usage.get("total_tokens") or (prompt_tokens + completion_tokens)
return LettaUsageStatistics(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
)
@trace_method
async def stream_async(
self,
request_data: dict,
llm_config: LLMConfig,
) -> AsyncStreamWrapper:
"""Stream response from ChatGPT backend.
Note: ChatGPT backend uses SSE by default. This returns a custom
async generator that yields ResponseStreamEvent objects compatible
with the OpenAI SDK.
Args:
request_data: Request payload.
llm_config: LLM configuration.
Returns:
Async generator yielding ResponseStreamEvent objects.
"""
request_data = sanitize_unicode_surrogates(request_data)
_, creds = await self._get_provider_and_credentials_async(llm_config)
headers = self._build_headers(creds)
endpoint = llm_config.model_endpoint or CHATGPT_CODEX_ENDPOINT
async def stream_generator():
event_count = 0
# Track output item index for proper event construction
output_index = 0
# Track sequence_number in case backend doesn't provide it
# (OpenAI SDK expects incrementing sequence numbers starting at 0)
sequence_counter = 0
async with httpx.AsyncClient() as client:
async with client.stream(
"POST",
endpoint,
json=request_data,
headers=headers,
timeout=120.0,
) as response:
# Check for error status
if response.status_code != 200:
error_body = await response.aread()
logger.error(f"ChatGPT SSE error: {response.status_code} - {error_body}")
raise self._handle_http_error_from_status(response.status_code, error_body.decode())
async for line in response.aiter_lines():
if not line.startswith("data: "):
continue
data_str = line[6:]
if data_str == "[DONE]":
break
try:
raw_event = json.loads(data_str)
event_type = raw_event.get("type")
event_count += 1
# Use backend-provided sequence_number if available, else use counter
# This ensures proper ordering even if backend doesn't provide it
if "sequence_number" not in raw_event:
raw_event["sequence_number"] = sequence_counter
sequence_counter = raw_event["sequence_number"] + 1
# Track output index for output_item.added events
if event_type == "response.output_item.added":
output_index = raw_event.get("output_index", output_index)
# Convert to OpenAI SDK ResponseStreamEvent
sdk_event = self._convert_to_sdk_event(raw_event, output_index)
if sdk_event:
yield sdk_event
except json.JSONDecodeError:
logger.warning(f"Failed to parse SSE event: {data_str[:100]}")
continue
# Wrap the async generator in AsyncStreamWrapper to provide context manager protocol
return AsyncStreamWrapper(stream_generator())
def _convert_to_sdk_event(
self,
raw_event: dict,
output_index: int = 0,
) -> Optional[ResponseStreamEvent]:
"""Convert raw ChatGPT backend SSE event to OpenAI SDK ResponseStreamEvent.
Uses model_construct() to bypass validation since ChatGPT backend doesn't
provide all fields required by OpenAI SDK (e.g., sequence_number).
Args:
raw_event: Raw SSE event data from ChatGPT backend.
output_index: Current output item index.
Returns:
OpenAI SDK ResponseStreamEvent or None if event type not handled.
"""
event_type = raw_event.get("type")
response_id = raw_event.get("response_id", "")
seq_num = raw_event.get("sequence_number", 0)
# response.created -> ResponseCreatedEvent
if event_type == "response.created":
response_data = raw_event.get("response", {})
return ResponseCreatedEvent.model_construct(
type="response.created",
sequence_number=seq_num,
response=Response.model_construct(
id=response_data.get("id", response_id),
created_at=response_data.get("created_at", 0),
model=response_data.get("model", ""),
object="response",
output=[],
status=response_data.get("status", "in_progress"),
parallel_tool_calls=response_data.get("parallel_tool_calls", True),
),
)
# response.in_progress -> ResponseInProgressEvent
elif event_type == "response.in_progress":
response_data = raw_event.get("response", {})
return ResponseInProgressEvent.model_construct(
type="response.in_progress",
sequence_number=seq_num,
response=Response.model_construct(
id=response_data.get("id", response_id),
created_at=response_data.get("created_at", 0),
model=response_data.get("model", ""),
object="response",
output=[],
status="in_progress",
parallel_tool_calls=response_data.get("parallel_tool_calls", True),
),
)
# response.output_item.added -> ResponseOutputItemAddedEvent
elif event_type == "response.output_item.added":
item_data = raw_event.get("item", {})
item_type = item_data.get("type")
idx = raw_event.get("output_index", output_index)
if item_type == "message":
item = ResponseOutputMessage.model_construct(
id=item_data.get("id", ""),
type="message",
role=item_data.get("role", "assistant"),
content=[],
status=item_data.get("status", "in_progress"),
)
elif item_type == "function_call":
item = ResponseFunctionToolCall.model_construct(
id=item_data.get("id", ""),
type="function_call",
call_id=item_data.get("call_id", ""),
name=item_data.get("name", ""),
arguments=item_data.get("arguments", ""),
status=item_data.get("status", "in_progress"),
)
elif item_type == "reasoning":
# Reasoning item for o-series, GPT-5 models
item = ResponseReasoningItem.model_construct(
id=item_data.get("id", ""),
type="reasoning",
summary=item_data.get("summary", []),
status=item_data.get("status", "in_progress"),
)
else:
# Unknown item type, skip
return None
return ResponseOutputItemAddedEvent.model_construct(
type="response.output_item.added",
sequence_number=seq_num,
output_index=idx,
item=item,
)
# response.content_part.added -> ResponseContentPartAddedEvent
elif event_type == "response.content_part.added":
part_data = raw_event.get("part", {})
return ResponseContentPartAddedEvent.model_construct(
type="response.content_part.added",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
content_index=raw_event.get("content_index", 0),
part=ResponseOutputText.model_construct(
type="output_text",
text=part_data.get("text", ""),
annotations=[],
),
)
# response.output_text.delta -> ResponseTextDeltaEvent
# Note: OpenAI SDK uses "response.output_text.delta" (matching ChatGPT backend)
elif event_type == "response.output_text.delta":
return ResponseTextDeltaEvent.model_construct(
type="response.output_text.delta",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
content_index=raw_event.get("content_index", 0),
delta=raw_event.get("delta", ""),
)
# response.output_text.done -> ResponseTextDoneEvent
elif event_type == "response.output_text.done":
return ResponseTextDoneEvent.model_construct(
type="response.output_text.done",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
content_index=raw_event.get("content_index", 0),
text=raw_event.get("text", ""),
)
# response.function_call_arguments.delta -> ResponseFunctionCallArgumentsDeltaEvent
elif event_type == "response.function_call_arguments.delta":
return ResponseFunctionCallArgumentsDeltaEvent.model_construct(
type="response.function_call_arguments.delta",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
call_id=raw_event.get("call_id", ""),
delta=raw_event.get("delta", ""),
)
# response.function_call_arguments.done -> ResponseFunctionCallArgumentsDoneEvent
elif event_type == "response.function_call_arguments.done":
return ResponseFunctionCallArgumentsDoneEvent.model_construct(
type="response.function_call_arguments.done",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
call_id=raw_event.get("call_id", ""),
arguments=raw_event.get("arguments", ""),
)
# response.content_part.done -> ResponseContentPartDoneEvent
elif event_type == "response.content_part.done":
part_data = raw_event.get("part", {})
return ResponseContentPartDoneEvent.model_construct(
type="response.content_part.done",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
content_index=raw_event.get("content_index", 0),
part=ResponseOutputText.model_construct(
type="output_text",
text=part_data.get("text", ""),
annotations=[],
),
)
# response.output_item.done -> ResponseOutputItemDoneEvent
elif event_type == "response.output_item.done":
item_data = raw_event.get("item", {})
item_type = item_data.get("type")
idx = raw_event.get("output_index", output_index)
if item_type == "message":
# Build content from item data
content_list = []
for c in item_data.get("content", []):
if c.get("type") in ("output_text", "text"):
content_list.append(
ResponseOutputText.model_construct(
type="output_text",
text=c.get("text", ""),
annotations=[],
)
)
item = ResponseOutputMessage.model_construct(
id=item_data.get("id", ""),
type="message",
role=item_data.get("role", "assistant"),
content=content_list,
status=item_data.get("status", "completed"),
)
elif item_type == "function_call":
item = ResponseFunctionToolCall.model_construct(
id=item_data.get("id", ""),
type="function_call",
call_id=item_data.get("call_id", ""),
name=item_data.get("name", ""),
arguments=item_data.get("arguments", ""),
status=item_data.get("status", "completed"),
)
elif item_type == "reasoning":
# Build summary from item data
summary_list = item_data.get("summary", [])
item = ResponseReasoningItem.model_construct(
id=item_data.get("id", ""),
type="reasoning",
summary=summary_list,
status=item_data.get("status", "completed"),
)
else:
return None
return ResponseOutputItemDoneEvent.model_construct(
type="response.output_item.done",
sequence_number=seq_num,
output_index=idx,
item=item,
)
# response.completed or response.done -> ResponseCompletedEvent
elif event_type in ("response.completed", "response.done"):
response_data = raw_event.get("response", {})
# Build output items from response data
output_items = []
for out in response_data.get("output", []):
out_type = out.get("type")
if out_type == "message":
content_list = []
for c in out.get("content", []):
if c.get("type") in ("output_text", "text"):
content_list.append(
ResponseOutputText.model_construct(
type="output_text",
text=c.get("text", ""),
annotations=[],
)
)
output_items.append(
ResponseOutputMessage.model_construct(
id=out.get("id", ""),
type="message",
role=out.get("role", "assistant"),
content=content_list,
status=out.get("status", "completed"),
)
)
elif out_type == "function_call":
output_items.append(
ResponseFunctionToolCall.model_construct(
id=out.get("id", ""),
type="function_call",
call_id=out.get("call_id", ""),
name=out.get("name", ""),
arguments=out.get("arguments", ""),
status=out.get("status", "completed"),
)
)
return ResponseCompletedEvent.model_construct(
type="response.completed",
sequence_number=seq_num,
response=Response.model_construct(
id=response_data.get("id", response_id),
created_at=response_data.get("created_at", 0),
model=response_data.get("model", ""),
object="response",
output=output_items,
status=response_data.get("status", "completed"),
parallel_tool_calls=response_data.get("parallel_tool_calls", True),
usage=response_data.get("usage"),
),
)
# Reasoning events (for o-series, GPT-5 models)
# response.reasoning_summary_part.added -> ResponseReasoningSummaryPartAddedEvent
elif event_type == "response.reasoning_summary_part.added":
part_data = raw_event.get("part", {})
# Use a simple dict for Part since we use model_construct
part = {"text": part_data.get("text", ""), "type": part_data.get("type", "summary_text")}
return ResponseReasoningSummaryPartAddedEvent.model_construct(
type="response.reasoning_summary_part.added",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
summary_index=raw_event.get("summary_index", 0),
part=part,
)
# response.reasoning_summary_text.delta -> ResponseReasoningSummaryTextDeltaEvent
elif event_type == "response.reasoning_summary_text.delta":
return ResponseReasoningSummaryTextDeltaEvent.model_construct(
type="response.reasoning_summary_text.delta",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
summary_index=raw_event.get("summary_index", 0),
delta=raw_event.get("delta", ""),
)
# response.reasoning_summary_text.done -> ResponseReasoningSummaryTextDoneEvent
elif event_type == "response.reasoning_summary_text.done":
return ResponseReasoningSummaryTextDoneEvent.model_construct(
type="response.reasoning_summary_text.done",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
summary_index=raw_event.get("summary_index", 0),
text=raw_event.get("text", ""),
)
# response.reasoning_summary_part.done -> ResponseReasoningSummaryPartDoneEvent
elif event_type == "response.reasoning_summary_part.done":
part_data = raw_event.get("part", {})
part = {"text": part_data.get("text", ""), "type": part_data.get("type", "summary_text")}
return ResponseReasoningSummaryPartDoneEvent.model_construct(
type="response.reasoning_summary_part.done",
sequence_number=seq_num,
item_id=raw_event.get("item_id", ""),
output_index=raw_event.get("output_index", output_index),
summary_index=raw_event.get("summary_index", 0),
part=part,
)
# Unhandled event types - log for debugging
logger.debug(f"Unhandled SSE event type: {event_type}")
return None
def _handle_http_error_from_status(self, status_code: int, error_body: str) -> Exception:
"""Create appropriate exception from HTTP status code.
Args:
status_code: HTTP status code.
error_body: Error response body.
Returns:
Appropriate LLM exception.
"""
if status_code == 401:
return LLMAuthenticationError(
message=f"ChatGPT authentication failed: {error_body}",
code=ErrorCode.UNAUTHENTICATED,
)
elif status_code == 429:
return LLMRateLimitError(
message=f"ChatGPT rate limit exceeded: {error_body}",
code=ErrorCode.RATE_LIMIT_EXCEEDED,
)
elif status_code >= 500:
return LLMServerError(
message=f"ChatGPT server error: {error_body}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
)
else:
return LLMBadRequestError(
message=f"ChatGPT request failed ({status_code}): {error_body}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
)
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
"""Check if model is a reasoning model.
Args:
llm_config: LLM configuration.
Returns:
True if model supports extended reasoning.
"""
model = llm_config.model.lower()
return "o1" in model or "o3" in model or "o4" in model or "gpt-5" in model
@trace_method
def handle_llm_error(self, e: Exception) -> Exception:
"""Map ChatGPT-specific errors to common LLMError types.
Args:
e: Original exception.
Returns:
Mapped LLMError subclass.
"""
if isinstance(e, httpx.HTTPStatusError):
return self._handle_http_error(e)
return super().handle_llm_error(e)
def _handle_http_error(self, e: httpx.HTTPStatusError) -> Exception:
"""Handle HTTP status errors from ChatGPT backend.
Args:
e: HTTP status error.
Returns:
Appropriate LLMError subclass.
"""
status_code = e.response.status_code
error_text = str(e)
try:
error_json = e.response.json()
error_message = error_json.get("error", {}).get("message", error_text)
except Exception:
error_message = error_text
if status_code == 401:
return LLMAuthenticationError(
message=f"ChatGPT authentication failed: {error_message}",
code=ErrorCode.UNAUTHENTICATED,
)
elif status_code == 429:
return LLMRateLimitError(
message=f"ChatGPT rate limit exceeded: {error_message}",
code=ErrorCode.RATE_LIMIT_EXCEEDED,
)
elif status_code == 400:
if "context" in error_message.lower() or "token" in error_message.lower():
return ContextWindowExceededError(
message=f"ChatGPT context window exceeded: {error_message}",
)
return LLMBadRequestError(
message=f"ChatGPT bad request: {error_message}",
code=ErrorCode.INVALID_ARGUMENT,
)
elif status_code >= 500:
return LLMServerError(
message=f"ChatGPT server error: {error_message}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
)
else:
return LLMBadRequestError(
message=f"ChatGPT request failed ({status_code}): {error_message}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
)