feat: cleanup cancellation code and add more logging (#6588)
This commit is contained in:
committed by
Caren Thomas
parent
70c57c5072
commit
c8fa77a01f
@@ -168,7 +168,12 @@ class AnthropicClient(LLMClientBase):
|
||||
if hasattr(llm_config, "response_format") and isinstance(llm_config.response_format, JsonSchemaResponseFormat):
|
||||
betas.append("structured-outputs-2025-11-13")
|
||||
|
||||
return await client.beta.messages.create(**request_data, betas=betas)
|
||||
# log failed requests
|
||||
try:
|
||||
return await client.beta.messages.create(**request_data, betas=betas)
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming Anthropic request: {e} with request data: {json.dumps(request_data)}")
|
||||
raise e
|
||||
|
||||
@trace_method
|
||||
async def send_llm_batch_request_async(
|
||||
|
||||
@@ -140,11 +140,16 @@ class GoogleVertexClient(LLMClientBase):
|
||||
@trace_method
|
||||
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
|
||||
client = self._get_client()
|
||||
response = await client.aio.models.generate_content_stream(
|
||||
model=llm_config.model,
|
||||
contents=request_data["contents"],
|
||||
config=request_data["config"],
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.aio.models.generate_content_stream(
|
||||
model=llm_config.model,
|
||||
contents=request_data["contents"],
|
||||
config=request_data["config"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming Google Vertex request: {e} with request data: {json.dumps(request_data)}")
|
||||
raise e
|
||||
# Direct yield - keeps response alive in generator's local scope throughout iteration
|
||||
# This is required because the SDK's connection lifecycle is tied to the response object
|
||||
async for chunk in response:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import Any, List, Optional
|
||||
@@ -762,17 +763,25 @@ class OpenAIClient(LLMClientBase):
|
||||
|
||||
# Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
|
||||
if "input" in request_data and "messages" not in request_data:
|
||||
response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(
|
||||
**request_data,
|
||||
stream=True,
|
||||
# stream_options={"include_usage": True},
|
||||
)
|
||||
try:
|
||||
response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(
|
||||
**request_data,
|
||||
stream=True,
|
||||
# stream_options={"include_usage": True},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming OpenAI Responses request: {e} with request data: {json.dumps(request_data)}")
|
||||
raise e
|
||||
else:
|
||||
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
|
||||
**request_data,
|
||||
stream=True,
|
||||
stream_options={"include_usage": True},
|
||||
)
|
||||
try:
|
||||
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
|
||||
**request_data,
|
||||
stream=True,
|
||||
stream_options={"include_usage": True},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming OpenAI Chat Completions request: {e} with request data: {json.dumps(request_data)}")
|
||||
raise e
|
||||
return response_stream
|
||||
|
||||
@trace_method
|
||||
|
||||
Reference in New Issue
Block a user