diff --git a/letta/llm_api/google_ai_client.py b/letta/llm_api/google_ai_client.py index f056a64b..c0343c65 100644 --- a/letta/llm_api/google_ai_client.py +++ b/letta/llm_api/google_ai_client.py @@ -1,422 +1,21 @@ -import json -import uuid from typing import List, Optional, Tuple import requests from google import genai -from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig -from letta.constants import NON_USER_MSG_PREFIX from letta.errors import ErrorCode, LLMAuthenticationError, LLMError -from letta.helpers.datetime_helpers import get_utc_time_int -from letta.helpers.json_helpers import json_dumps from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK -from letta.llm_api.helpers import make_post_request -from letta.llm_api.llm_client_base import LLMClientBase -from letta.local_llm.json_parser import clean_json_string_extra_backslash -from letta.local_llm.utils import count_tokens +from letta.llm_api.google_vertex_client import GoogleVertexClient from letta.log import get_logger -from letta.schemas.enums import ProviderCategory -from letta.schemas.llm_config import LLMConfig -from letta.schemas.message import Message as PydanticMessage -from letta.schemas.openai.chat_completion_request import Tool -from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics from letta.settings import model_settings -from letta.utils import get_tool_call_id logger = get_logger(__name__) -class GoogleAIClient(LLMClientBase): +class GoogleAIClient(GoogleVertexClient): - def request(self, request_data: dict, llm_config: LLMConfig) -> dict: - """ - Performs underlying request to llm and returns raw response. - """ - api_key = None - if llm_config.provider_category == ProviderCategory.byok: - from letta.services.provider_manager import ProviderManager - - api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor) - - if not api_key: - api_key = model_settings.gemini_api_key - - # print("[google_ai request]", json.dumps(request_data, indent=2)) - url, headers = get_gemini_endpoint_and_headers( - base_url=str(llm_config.model_endpoint), - model=llm_config.model, - api_key=str(api_key), - key_in_header=True, - generate_content=True, - ) - return make_post_request(url, headers, request_data) - - def build_request_data( - self, - messages: List[PydanticMessage], - llm_config: LLMConfig, - tools: List[dict], - force_tool_call: Optional[str] = None, - ) -> dict: - """ - Constructs a request object in the expected data format for this client. - """ - if tools: - tools = [{"type": "function", "function": f} for f in tools] - tool_objs = [Tool(**t) for t in tools] - tool_names = [t.function.name for t in tool_objs] - # Convert to the exact payload style Google expects - tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config) - else: - tool_names = [] - - contents = self.add_dummy_model_messages( - [m.to_google_ai_dict() for m in messages], - ) - - request_data = { - "contents": contents, - "tools": tools, - "generation_config": { - "temperature": llm_config.temperature, - "max_output_tokens": llm_config.max_tokens, - }, - } - - # write tool config - tool_config = ToolConfig( - function_calling_config=FunctionCallingConfig( - # ANY mode forces the model to predict only function calls - mode=FunctionCallingConfigMode.ANY, - # Provide the list of tools (though empty should also work, it seems not to) - allowed_function_names=tool_names, - ) - ) - request_data["tool_config"] = tool_config.model_dump() - return request_data - - def convert_response_to_chat_completion( - self, - response_data: dict, - input_messages: List[PydanticMessage], - llm_config: LLMConfig, - ) -> ChatCompletionResponse: - """ - Converts custom response format from llm client into an OpenAI - ChatCompletionsResponse object. - - Example Input: - { - "candidates": [ - { - "content": { - "parts": [ - { - "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14." - } - ] - } - } - ], - "usageMetadata": { - "promptTokenCount": 9, - "candidatesTokenCount": 27, - "totalTokenCount": 36 - } - } - """ - # print("[google_ai response]", json.dumps(response_data, indent=2)) - - try: - choices = [] - index = 0 - for candidate in response_data["candidates"]: - content = candidate["content"] - - if "role" not in content or not content["role"]: - # This means the response is malformed like MALFORMED_FUNCTION_CALL - # NOTE: must be a ValueError to trigger a retry - raise ValueError(f"Error in response data from LLM: {response_data}") - role = content["role"] - assert role == "model", f"Unknown role in response: {role}" - - parts = content["parts"] - - # NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices), - # so let's disable it for now - - # NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text - # {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'} - # To patch this, if we have multiple parts we can take the last one - if len(parts) > 1: - logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}") - parts = [parts[-1]] - - # TODO support parts / multimodal - # TODO support parallel tool calling natively - # TODO Alternative here is to throw away everything else except for the first part - for response_message in parts: - # Convert the actual message style to OpenAI style - if "functionCall" in response_message and response_message["functionCall"] is not None: - function_call = response_message["functionCall"] - assert isinstance(function_call, dict), function_call - function_name = function_call["name"] - assert isinstance(function_name, str), function_name - function_args = function_call["args"] - assert isinstance(function_args, dict), function_args - - # NOTE: this also involves stripping the inner monologue out of the function - if llm_config.put_inner_thoughts_in_kwargs: - from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX - - assert ( - INNER_THOUGHTS_KWARG_VERTEX in function_args - ), f"Couldn't find inner thoughts in function args:\n{function_call}" - inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX) - assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}" - else: - inner_thoughts = None - - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - tool_calls=[ - ToolCall( - id=get_tool_call_id(), - type="function", - function=FunctionCall( - name=function_name, - arguments=clean_json_string_extra_backslash(json_dumps(function_args)), - ), - ) - ], - ) - - else: - - # Inner thoughts are the content by default - inner_thoughts = response_message["text"] - - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - ) - - # Google AI API uses different finish reason strings than OpenAI - # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null - # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api - # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER - # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason - finish_reason = candidate["finishReason"] - if finish_reason == "STOP": - openai_finish_reason = ( - "function_call" - if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0 - else "stop" - ) - elif finish_reason == "MAX_TOKENS": - openai_finish_reason = "length" - elif finish_reason == "SAFETY": - openai_finish_reason = "content_filter" - elif finish_reason == "RECITATION": - openai_finish_reason = "content_filter" - else: - raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}") - - choices.append( - Choice( - finish_reason=openai_finish_reason, - index=index, - message=openai_response_message, - ) - ) - index += 1 - - # if len(choices) > 1: - # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})") - - # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist? - # "usageMetadata": { - # "promptTokenCount": 9, - # "candidatesTokenCount": 27, - # "totalTokenCount": 36 - # } - if "usageMetadata" in response_data: - usage_data = response_data["usageMetadata"] - if "promptTokenCount" not in usage_data: - raise ValueError(f"promptTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}") - if "totalTokenCount" not in usage_data: - raise ValueError(f"totalTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}") - if "candidatesTokenCount" not in usage_data: - raise ValueError(f"candidatesTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}") - - prompt_tokens = usage_data["promptTokenCount"] - completion_tokens = usage_data["candidatesTokenCount"] - total_tokens = usage_data["totalTokenCount"] - - usage = UsageStatistics( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - ) - else: - # Count it ourselves - assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required" - prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation - completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate - total_tokens = prompt_tokens + completion_tokens - usage = UsageStatistics( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - ) - - response_id = str(uuid.uuid4()) - return ChatCompletionResponse( - id=response_id, - choices=choices, - model=llm_config.model, # NOTE: Google API doesn't pass back model in the response - created=get_utc_time_int(), - usage=usage, - ) - except KeyError as e: - raise e - - def _clean_google_ai_schema_properties(self, schema_part: dict): - """Recursively clean schema parts to remove unsupported Google AI keywords.""" - if not isinstance(schema_part, dict): - return - - # Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations - # * Only a subset of the OpenAPI schema is supported. - # * Supported parameter types in Python are limited. - unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"] - keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part] - for key_to_remove in keys_to_remove_at_this_level: - logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.") - del schema_part[key_to_remove] - - if schema_part.get("type") == "string" and "format" in schema_part: - allowed_formats = ["enum", "date-time"] - if schema_part["format"] not in allowed_formats: - logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}") - del schema_part["format"] - - # Check properties within the current level - if "properties" in schema_part and isinstance(schema_part["properties"], dict): - for prop_name, prop_schema in schema_part["properties"].items(): - self._clean_google_ai_schema_properties(prop_schema) - - # Check items within arrays - if "items" in schema_part and isinstance(schema_part["items"], dict): - self._clean_google_ai_schema_properties(schema_part["items"]) - - # Check within anyOf, allOf, oneOf lists - for key in ["anyOf", "allOf", "oneOf"]: - if key in schema_part and isinstance(schema_part[key], list): - for item_schema in schema_part[key]: - self._clean_google_ai_schema_properties(item_schema) - - def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]: - """ - OpenAI style: - "tools": [{ - "type": "function", - "function": { - "name": "find_movies", - "description": "find ....", - "parameters": { - "type": "object", - "properties": { - PARAM: { - "type": PARAM_TYPE, # eg "string" - "description": PARAM_DESCRIPTION, - }, - ... - }, - "required": List[str], - } - } - } - ] - - Google AI style: - "tools": [{ - "functionDeclarations": [{ - "name": "find_movies", - "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.", - "parameters": { - "type": "OBJECT", - "properties": { - "location": { - "type": "STRING", - "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616" - }, - "description": { - "type": "STRING", - "description": "Any kind of description including category or genre, title words, attributes, etc." - } - }, - "required": ["description"] - } - }, { - "name": "find_theaters", - ... - """ - function_list = [ - dict( - name=t.function.name, - description=t.function.description, - parameters=t.function.parameters, # TODO need to unpack - ) - for t in tools - ] - - # Add inner thoughts if needed - for func in function_list: - # Note: Google AI API used to have weird casing requirements, but not any more - - # Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned - if "parameters" in func and isinstance(func["parameters"], dict): - self._clean_google_ai_schema_properties(func["parameters"]) - - # Add inner thoughts - if llm_config.put_inner_thoughts_in_kwargs: - from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX - - func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = { - "type": "string", - "description": INNER_THOUGHTS_KWARG_DESCRIPTION, - } - func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX) - - return [{"functionDeclarations": function_list}] - - def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]: - """Google AI API requires all function call returns are immediately followed by a 'model' role message. - - In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user, - so there is no natural follow-up 'model' role message. - - To satisfy the Google AI API restrictions, we can add a dummy 'yield' message - with role == 'model' that is placed in-betweeen and function output - (role == 'tool') and user message (role == 'user'). - """ - dummy_yield_message = { - "role": "model", - "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}], - } - messages_with_padding = [] - for i, message in enumerate(messages): - messages_with_padding.append(message) - # Check if the current message role is 'tool' and the next message role is 'user' - if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"): - messages_with_padding.append(dummy_yield_message) - - return messages_with_padding + def _get_client(self): + return genai.Client(api_key=model_settings.gemini_api_key) def get_gemini_endpoint_and_headers( diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 7319f7fc..bef2ef4f 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -5,14 +5,16 @@ from typing import List, Optional from google import genai from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig +from letta.constants import NON_USER_MSG_PREFIX from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.json_helpers import json_dumps, json_loads -from letta.llm_api.google_ai_client import GoogleAIClient +from letta.llm_api.llm_client_base import LLMClientBase from letta.local_llm.json_parser import clean_json_string_extra_backslash from letta.local_llm.utils import count_tokens from letta.log import get_logger from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as PydanticMessage +from letta.schemas.openai.chat_completion_request import Tool from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics from letta.settings import model_settings, settings from letta.utils import get_tool_call_id @@ -20,18 +22,21 @@ from letta.utils import get_tool_call_id logger = get_logger(__name__) -class GoogleVertexClient(GoogleAIClient): +class GoogleVertexClient(LLMClientBase): - def request(self, request_data: dict, llm_config: LLMConfig) -> dict: - """ - Performs underlying request to llm and returns raw response. - """ - client = genai.Client( + def _get_client(self): + return genai.Client( vertexai=True, project=model_settings.google_cloud_project, location=model_settings.google_cloud_location, http_options={"api_version": "v1"}, ) + + def request(self, request_data: dict, llm_config: LLMConfig) -> dict: + """ + Performs underlying request to llm and returns raw response. + """ + client = self._get_client() response = client.models.generate_content( model=llm_config.model, contents=request_data["contents"], @@ -43,12 +48,7 @@ class GoogleVertexClient(GoogleAIClient): """ Performs underlying request to llm and returns raw response. """ - client = genai.Client( - vertexai=True, - project=model_settings.google_cloud_project, - location=model_settings.google_cloud_location, - http_options={"api_version": "v1"}, - ) + client = self._get_client() response = await client.aio.models.generate_content( model=llm_config.model, contents=request_data["contents"], @@ -56,6 +56,139 @@ class GoogleVertexClient(GoogleAIClient): ) return response.model_dump() + def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]: + """Google AI API requires all function call returns are immediately followed by a 'model' role message. + + In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user, + so there is no natural follow-up 'model' role message. + + To satisfy the Google AI API restrictions, we can add a dummy 'yield' message + with role == 'model' that is placed in-betweeen and function output + (role == 'tool') and user message (role == 'user'). + """ + dummy_yield_message = { + "role": "model", + "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}], + } + messages_with_padding = [] + for i, message in enumerate(messages): + messages_with_padding.append(message) + # Check if the current message role is 'tool' and the next message role is 'user' + if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"): + messages_with_padding.append(dummy_yield_message) + + return messages_with_padding + + def _clean_google_ai_schema_properties(self, schema_part: dict): + """Recursively clean schema parts to remove unsupported Google AI keywords.""" + if not isinstance(schema_part, dict): + return + + # Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations + # * Only a subset of the OpenAPI schema is supported. + # * Supported parameter types in Python are limited. + unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"] + keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part] + for key_to_remove in keys_to_remove_at_this_level: + logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.") + del schema_part[key_to_remove] + + if schema_part.get("type") == "string" and "format" in schema_part: + allowed_formats = ["enum", "date-time"] + if schema_part["format"] not in allowed_formats: + logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}") + del schema_part["format"] + + # Check properties within the current level + if "properties" in schema_part and isinstance(schema_part["properties"], dict): + for prop_name, prop_schema in schema_part["properties"].items(): + self._clean_google_ai_schema_properties(prop_schema) + + # Check items within arrays + if "items" in schema_part and isinstance(schema_part["items"], dict): + self._clean_google_ai_schema_properties(schema_part["items"]) + + # Check within anyOf, allOf, oneOf lists + for key in ["anyOf", "allOf", "oneOf"]: + if key in schema_part and isinstance(schema_part[key], list): + for item_schema in schema_part[key]: + self._clean_google_ai_schema_properties(item_schema) + + def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]: + """ + OpenAI style: + "tools": [{ + "type": "function", + "function": { + "name": "find_movies", + "description": "find ....", + "parameters": { + "type": "object", + "properties": { + PARAM: { + "type": PARAM_TYPE, # eg "string" + "description": PARAM_DESCRIPTION, + }, + ... + }, + "required": List[str], + } + } + } + ] + + Google AI style: + "tools": [{ + "functionDeclarations": [{ + "name": "find_movies", + "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.", + "parameters": { + "type": "OBJECT", + "properties": { + "location": { + "type": "STRING", + "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616" + }, + "description": { + "type": "STRING", + "description": "Any kind of description including category or genre, title words, attributes, etc." + } + }, + "required": ["description"] + } + }, { + "name": "find_theaters", + ... + """ + function_list = [ + dict( + name=t.function.name, + description=t.function.description, + parameters=t.function.parameters, # TODO need to unpack + ) + for t in tools + ] + + # Add inner thoughts if needed + for func in function_list: + # Note: Google AI API used to have weird casing requirements, but not any more + + # Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned + if "parameters" in func and isinstance(func["parameters"], dict): + self._clean_google_ai_schema_properties(func["parameters"]) + + # Add inner thoughts + if llm_config.put_inner_thoughts_in_kwargs: + from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX + + func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = { + "type": "string", + "description": INNER_THOUGHTS_KWARG_DESCRIPTION, + } + func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX) + + return [{"functionDeclarations": function_list}] + def build_request_data( self, messages: List[PydanticMessage], @@ -66,7 +199,40 @@ class GoogleVertexClient(GoogleAIClient): """ Constructs a request object in the expected data format for this client. """ - request_data = super().build_request_data(messages, llm_config, tools, force_tool_call) + + if tools: + tool_objs = [Tool(type="function", function=t) for t in tools] + tool_names = [t.function.name for t in tool_objs] + # Convert to the exact payload style Google expects + formatted_tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config) + else: + tool_names = [] + + contents = self.add_dummy_model_messages( + [m.to_google_ai_dict() for m in messages], + ) + + request_data = { + "contents": contents, + "tools": formatted_tools, + "generation_config": { + "temperature": llm_config.temperature, + "max_output_tokens": llm_config.max_tokens, + }, + } + + # write tool config + tool_config = ToolConfig( + function_calling_config=FunctionCallingConfig( + # ANY mode forces the model to predict only function calls + mode=FunctionCallingConfigMode.ANY, + # Provide the list of tools (though empty should also work, it seems not to) + allowed_function_names=tool_names, + ) + ) + request_data["tool_config"] = tool_config.model_dump() + + # request_data = super().build_request_data(messages, llm_config, tools, force_tool_call) request_data["config"] = request_data.pop("generation_config") request_data["config"]["tools"] = request_data.pop("tools") @@ -89,11 +255,11 @@ class GoogleVertexClient(GoogleAIClient): # Add thinking_config # If enable_reasoner is False, set thinking_budget to 0 # Otherwise, use the value from max_reasoning_tokens - thinking_budget = 0 if not llm_config.enable_reasoner else llm_config.max_reasoning_tokens - thinking_config = ThinkingConfig( - thinking_budget=thinking_budget, - ) - request_data["config"]["thinking_config"] = thinking_config.model_dump() + if llm_config.enable_reasoner: + thinking_config = ThinkingConfig( + thinking_budget=llm_config.max_reasoning_tokens, + ) + request_data["config"]["thinking_config"] = thinking_config.model_dump() return request_data diff --git a/poetry.lock b/poetry.lock index 6d001a9c..d0976b47 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -2123,15 +2123,15 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] name = "google-genai" -version = "1.10.0" +version = "1.15.0" description = "GenAI Python SDK" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"google\"" files = [ - {file = "google_genai-1.10.0-py3-none-any.whl", hash = "sha256:41b105a2fcf8a027fc45cc16694cd559b8cd1272eab7345ad58cfa2c353bf34f"}, - {file = "google_genai-1.10.0.tar.gz", hash = "sha256:f59423e0f155dc66b7792c8a0e6724c75c72dc699d1eb7907d4d0006d4f6186f"}, + {file = "google_genai-1.15.0-py3-none-any.whl", hash = "sha256:6d7f149cc735038b680722bed495004720514c234e2a445ab2f27967955071dd"}, + {file = "google_genai-1.15.0.tar.gz", hash = "sha256:118bb26960d6343cd64f1aeb5c2b02144a36ad06716d0d1eb1fa3e0904db51f1"}, ] [package.dependencies] @@ -7570,4 +7570,4 @@ tests = ["wikipedia"] [metadata] lock-version = "2.1" python-versions = "<3.14,>=3.10" -content-hash = "19eee9b3cd3d270cb748183bc332dd69706bb0bd3150c62e73e61ed437a40c78" +content-hash = "e73bf0ff3ec8b6b839d69f2a6e51228fb61a20030e3b334e74e259361ca8ab43" diff --git a/pyproject.toml b/pyproject.toml index 0abde6b5..9cc8b155 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ opentelemetry-api = "1.30.0" opentelemetry-sdk = "1.30.0" opentelemetry-instrumentation-requests = "0.51b0" opentelemetry-exporter-otlp = "1.30.0" -google-genai = {version = "^1.1.0", optional = true} +google-genai = {version = "^1.15.0", optional = true} faker = "^36.1.0" colorama = "^0.4.6" marshmallow-sqlalchemy = "^1.4.1" diff --git a/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json b/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json index 0cf5d3b0..9967e64f 100644 --- a/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json +++ b/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json @@ -1,5 +1,5 @@ { - "model": "gemini-2.5-pro-exp-03-25", + "model": "gemini-2.5-pro-preview-05-06", "model_endpoint_type": "google_vertex", "model_endpoint": "https://us-central1-aiplatform.googleapis.com/v1/projects/memgpt-428419/locations/us-central1", "context_window": 1048576,