feat: cleanup cancellation code and add more logging (#6588)

This commit is contained in:
Sarah Wooders
2025-12-10 11:56:12 -08:00
committed by Caren Thomas
parent 70c57c5072
commit c8fa77a01f
7 changed files with 71 additions and 97 deletions

View File

@@ -168,7 +168,12 @@ class AnthropicClient(LLMClientBase):
if hasattr(llm_config, "response_format") and isinstance(llm_config.response_format, JsonSchemaResponseFormat):
betas.append("structured-outputs-2025-11-13")
return await client.beta.messages.create(**request_data, betas=betas)
# log failed requests
try:
return await client.beta.messages.create(**request_data, betas=betas)
except Exception as e:
logger.error(f"Error streaming Anthropic request: {e} with request data: {json.dumps(request_data)}")
raise e
@trace_method
async def send_llm_batch_request_async(

View File

@@ -140,11 +140,16 @@ class GoogleVertexClient(LLMClientBase):
@trace_method
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
client = self._get_client()
response = await client.aio.models.generate_content_stream(
model=llm_config.model,
contents=request_data["contents"],
config=request_data["config"],
)
try:
response = await client.aio.models.generate_content_stream(
model=llm_config.model,
contents=request_data["contents"],
config=request_data["config"],
)
except Exception as e:
logger.error(f"Error streaming Google Vertex request: {e} with request data: {json.dumps(request_data)}")
raise e
# Direct yield - keeps response alive in generator's local scope throughout iteration
# This is required because the SDK's connection lifecycle is tied to the response object
async for chunk in response:

View File

@@ -1,4 +1,5 @@
import asyncio
import json
import os
import time
from typing import Any, List, Optional
@@ -762,17 +763,25 @@ class OpenAIClient(LLMClientBase):
# Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
if "input" in request_data and "messages" not in request_data:
response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(
**request_data,
stream=True,
# stream_options={"include_usage": True},
)
try:
response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(
**request_data,
stream=True,
# stream_options={"include_usage": True},
)
except Exception as e:
logger.error(f"Error streaming OpenAI Responses request: {e} with request data: {json.dumps(request_data)}")
raise e
else:
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
**request_data,
stream=True,
stream_options={"include_usage": True},
)
try:
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
**request_data,
stream=True,
stream_options={"include_usage": True},
)
except Exception as e:
logger.error(f"Error streaming OpenAI Chat Completions request: {e} with request data: {json.dumps(request_data)}")
raise e
return response_stream
@trace_method