chore: add gemini + vertex to new agent loop (#2230)
This commit is contained in:
@@ -1,422 +1,21 @@
|
||||
import json
|
||||
import uuid
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
from google import genai
|
||||
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
|
||||
|
||||
from letta.constants import NON_USER_MSG_PREFIX
|
||||
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
|
||||
from letta.helpers.datetime_helpers import get_utc_time_int
|
||||
from letta.helpers.json_helpers import json_dumps
|
||||
from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
|
||||
from letta.llm_api.helpers import make_post_request
|
||||
from letta.llm_api.llm_client_base import LLMClientBase
|
||||
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
||||
from letta.local_llm.utils import count_tokens
|
||||
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
||||
from letta.log import get_logger
|
||||
from letta.schemas.enums import ProviderCategory
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.message import Message as PydanticMessage
|
||||
from letta.schemas.openai.chat_completion_request import Tool
|
||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
||||
from letta.settings import model_settings
|
||||
from letta.utils import get_tool_call_id
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class GoogleAIClient(LLMClientBase):
|
||||
class GoogleAIClient(GoogleVertexClient):
|
||||
|
||||
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||
"""
|
||||
Performs underlying request to llm and returns raw response.
|
||||
"""
|
||||
api_key = None
|
||||
if llm_config.provider_category == ProviderCategory.byok:
|
||||
from letta.services.provider_manager import ProviderManager
|
||||
|
||||
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
|
||||
|
||||
if not api_key:
|
||||
api_key = model_settings.gemini_api_key
|
||||
|
||||
# print("[google_ai request]", json.dumps(request_data, indent=2))
|
||||
url, headers = get_gemini_endpoint_and_headers(
|
||||
base_url=str(llm_config.model_endpoint),
|
||||
model=llm_config.model,
|
||||
api_key=str(api_key),
|
||||
key_in_header=True,
|
||||
generate_content=True,
|
||||
)
|
||||
return make_post_request(url, headers, request_data)
|
||||
|
||||
def build_request_data(
|
||||
self,
|
||||
messages: List[PydanticMessage],
|
||||
llm_config: LLMConfig,
|
||||
tools: List[dict],
|
||||
force_tool_call: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Constructs a request object in the expected data format for this client.
|
||||
"""
|
||||
if tools:
|
||||
tools = [{"type": "function", "function": f} for f in tools]
|
||||
tool_objs = [Tool(**t) for t in tools]
|
||||
tool_names = [t.function.name for t in tool_objs]
|
||||
# Convert to the exact payload style Google expects
|
||||
tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
|
||||
else:
|
||||
tool_names = []
|
||||
|
||||
contents = self.add_dummy_model_messages(
|
||||
[m.to_google_ai_dict() for m in messages],
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"contents": contents,
|
||||
"tools": tools,
|
||||
"generation_config": {
|
||||
"temperature": llm_config.temperature,
|
||||
"max_output_tokens": llm_config.max_tokens,
|
||||
},
|
||||
}
|
||||
|
||||
# write tool config
|
||||
tool_config = ToolConfig(
|
||||
function_calling_config=FunctionCallingConfig(
|
||||
# ANY mode forces the model to predict only function calls
|
||||
mode=FunctionCallingConfigMode.ANY,
|
||||
# Provide the list of tools (though empty should also work, it seems not to)
|
||||
allowed_function_names=tool_names,
|
||||
)
|
||||
)
|
||||
request_data["tool_config"] = tool_config.model_dump()
|
||||
return request_data
|
||||
|
||||
def convert_response_to_chat_completion(
|
||||
self,
|
||||
response_data: dict,
|
||||
input_messages: List[PydanticMessage],
|
||||
llm_config: LLMConfig,
|
||||
) -> ChatCompletionResponse:
|
||||
"""
|
||||
Converts custom response format from llm client into an OpenAI
|
||||
ChatCompletionsResponse object.
|
||||
|
||||
Example Input:
|
||||
{
|
||||
"candidates": [
|
||||
{
|
||||
"content": {
|
||||
"parts": [
|
||||
{
|
||||
"text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"usageMetadata": {
|
||||
"promptTokenCount": 9,
|
||||
"candidatesTokenCount": 27,
|
||||
"totalTokenCount": 36
|
||||
}
|
||||
}
|
||||
"""
|
||||
# print("[google_ai response]", json.dumps(response_data, indent=2))
|
||||
|
||||
try:
|
||||
choices = []
|
||||
index = 0
|
||||
for candidate in response_data["candidates"]:
|
||||
content = candidate["content"]
|
||||
|
||||
if "role" not in content or not content["role"]:
|
||||
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
||||
# NOTE: must be a ValueError to trigger a retry
|
||||
raise ValueError(f"Error in response data from LLM: {response_data}")
|
||||
role = content["role"]
|
||||
assert role == "model", f"Unknown role in response: {role}"
|
||||
|
||||
parts = content["parts"]
|
||||
|
||||
# NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices),
|
||||
# so let's disable it for now
|
||||
|
||||
# NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
|
||||
# {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
|
||||
# To patch this, if we have multiple parts we can take the last one
|
||||
if len(parts) > 1:
|
||||
logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
|
||||
parts = [parts[-1]]
|
||||
|
||||
# TODO support parts / multimodal
|
||||
# TODO support parallel tool calling natively
|
||||
# TODO Alternative here is to throw away everything else except for the first part
|
||||
for response_message in parts:
|
||||
# Convert the actual message style to OpenAI style
|
||||
if "functionCall" in response_message and response_message["functionCall"] is not None:
|
||||
function_call = response_message["functionCall"]
|
||||
assert isinstance(function_call, dict), function_call
|
||||
function_name = function_call["name"]
|
||||
assert isinstance(function_name, str), function_name
|
||||
function_args = function_call["args"]
|
||||
assert isinstance(function_args, dict), function_args
|
||||
|
||||
# NOTE: this also involves stripping the inner monologue out of the function
|
||||
if llm_config.put_inner_thoughts_in_kwargs:
|
||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
||||
|
||||
assert (
|
||||
INNER_THOUGHTS_KWARG_VERTEX in function_args
|
||||
), f"Couldn't find inner thoughts in function args:\n{function_call}"
|
||||
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
||||
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
||||
else:
|
||||
inner_thoughts = None
|
||||
|
||||
# Google AI API doesn't generate tool call IDs
|
||||
openai_response_message = Message(
|
||||
role="assistant", # NOTE: "model" -> "assistant"
|
||||
content=inner_thoughts,
|
||||
tool_calls=[
|
||||
ToolCall(
|
||||
id=get_tool_call_id(),
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=function_name,
|
||||
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
# Inner thoughts are the content by default
|
||||
inner_thoughts = response_message["text"]
|
||||
|
||||
# Google AI API doesn't generate tool call IDs
|
||||
openai_response_message = Message(
|
||||
role="assistant", # NOTE: "model" -> "assistant"
|
||||
content=inner_thoughts,
|
||||
)
|
||||
|
||||
# Google AI API uses different finish reason strings than OpenAI
|
||||
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
||||
# see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
||||
# Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
|
||||
# see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
|
||||
finish_reason = candidate["finishReason"]
|
||||
if finish_reason == "STOP":
|
||||
openai_finish_reason = (
|
||||
"function_call"
|
||||
if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
|
||||
else "stop"
|
||||
)
|
||||
elif finish_reason == "MAX_TOKENS":
|
||||
openai_finish_reason = "length"
|
||||
elif finish_reason == "SAFETY":
|
||||
openai_finish_reason = "content_filter"
|
||||
elif finish_reason == "RECITATION":
|
||||
openai_finish_reason = "content_filter"
|
||||
else:
|
||||
raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
|
||||
|
||||
choices.append(
|
||||
Choice(
|
||||
finish_reason=openai_finish_reason,
|
||||
index=index,
|
||||
message=openai_response_message,
|
||||
)
|
||||
)
|
||||
index += 1
|
||||
|
||||
# if len(choices) > 1:
|
||||
# raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
|
||||
|
||||
# NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
|
||||
# "usageMetadata": {
|
||||
# "promptTokenCount": 9,
|
||||
# "candidatesTokenCount": 27,
|
||||
# "totalTokenCount": 36
|
||||
# }
|
||||
if "usageMetadata" in response_data:
|
||||
usage_data = response_data["usageMetadata"]
|
||||
if "promptTokenCount" not in usage_data:
|
||||
raise ValueError(f"promptTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
|
||||
if "totalTokenCount" not in usage_data:
|
||||
raise ValueError(f"totalTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
|
||||
if "candidatesTokenCount" not in usage_data:
|
||||
raise ValueError(f"candidatesTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
|
||||
|
||||
prompt_tokens = usage_data["promptTokenCount"]
|
||||
completion_tokens = usage_data["candidatesTokenCount"]
|
||||
total_tokens = usage_data["totalTokenCount"]
|
||||
|
||||
usage = UsageStatistics(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
)
|
||||
else:
|
||||
# Count it ourselves
|
||||
assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
|
||||
prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
|
||||
completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
usage = UsageStatistics(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
)
|
||||
|
||||
response_id = str(uuid.uuid4())
|
||||
return ChatCompletionResponse(
|
||||
id=response_id,
|
||||
choices=choices,
|
||||
model=llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
||||
created=get_utc_time_int(),
|
||||
usage=usage,
|
||||
)
|
||||
except KeyError as e:
|
||||
raise e
|
||||
|
||||
def _clean_google_ai_schema_properties(self, schema_part: dict):
|
||||
"""Recursively clean schema parts to remove unsupported Google AI keywords."""
|
||||
if not isinstance(schema_part, dict):
|
||||
return
|
||||
|
||||
# Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
|
||||
# * Only a subset of the OpenAPI schema is supported.
|
||||
# * Supported parameter types in Python are limited.
|
||||
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
|
||||
keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
|
||||
for key_to_remove in keys_to_remove_at_this_level:
|
||||
logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
|
||||
del schema_part[key_to_remove]
|
||||
|
||||
if schema_part.get("type") == "string" and "format" in schema_part:
|
||||
allowed_formats = ["enum", "date-time"]
|
||||
if schema_part["format"] not in allowed_formats:
|
||||
logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}")
|
||||
del schema_part["format"]
|
||||
|
||||
# Check properties within the current level
|
||||
if "properties" in schema_part and isinstance(schema_part["properties"], dict):
|
||||
for prop_name, prop_schema in schema_part["properties"].items():
|
||||
self._clean_google_ai_schema_properties(prop_schema)
|
||||
|
||||
# Check items within arrays
|
||||
if "items" in schema_part and isinstance(schema_part["items"], dict):
|
||||
self._clean_google_ai_schema_properties(schema_part["items"])
|
||||
|
||||
# Check within anyOf, allOf, oneOf lists
|
||||
for key in ["anyOf", "allOf", "oneOf"]:
|
||||
if key in schema_part and isinstance(schema_part[key], list):
|
||||
for item_schema in schema_part[key]:
|
||||
self._clean_google_ai_schema_properties(item_schema)
|
||||
|
||||
def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
|
||||
"""
|
||||
OpenAI style:
|
||||
"tools": [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "find_movies",
|
||||
"description": "find ....",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
PARAM: {
|
||||
"type": PARAM_TYPE, # eg "string"
|
||||
"description": PARAM_DESCRIPTION,
|
||||
},
|
||||
...
|
||||
},
|
||||
"required": List[str],
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Google AI style:
|
||||
"tools": [{
|
||||
"functionDeclarations": [{
|
||||
"name": "find_movies",
|
||||
"description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
|
||||
"parameters": {
|
||||
"type": "OBJECT",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "STRING",
|
||||
"description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
|
||||
},
|
||||
"description": {
|
||||
"type": "STRING",
|
||||
"description": "Any kind of description including category or genre, title words, attributes, etc."
|
||||
}
|
||||
},
|
||||
"required": ["description"]
|
||||
}
|
||||
}, {
|
||||
"name": "find_theaters",
|
||||
...
|
||||
"""
|
||||
function_list = [
|
||||
dict(
|
||||
name=t.function.name,
|
||||
description=t.function.description,
|
||||
parameters=t.function.parameters, # TODO need to unpack
|
||||
)
|
||||
for t in tools
|
||||
]
|
||||
|
||||
# Add inner thoughts if needed
|
||||
for func in function_list:
|
||||
# Note: Google AI API used to have weird casing requirements, but not any more
|
||||
|
||||
# Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned
|
||||
if "parameters" in func and isinstance(func["parameters"], dict):
|
||||
self._clean_google_ai_schema_properties(func["parameters"])
|
||||
|
||||
# Add inner thoughts
|
||||
if llm_config.put_inner_thoughts_in_kwargs:
|
||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
|
||||
|
||||
func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
|
||||
"type": "string",
|
||||
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
||||
}
|
||||
func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
|
||||
|
||||
return [{"functionDeclarations": function_list}]
|
||||
|
||||
def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
|
||||
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
||||
|
||||
In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
|
||||
so there is no natural follow-up 'model' role message.
|
||||
|
||||
To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
|
||||
with role == 'model' that is placed in-betweeen and function output
|
||||
(role == 'tool') and user message (role == 'user').
|
||||
"""
|
||||
dummy_yield_message = {
|
||||
"role": "model",
|
||||
"parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
|
||||
}
|
||||
messages_with_padding = []
|
||||
for i, message in enumerate(messages):
|
||||
messages_with_padding.append(message)
|
||||
# Check if the current message role is 'tool' and the next message role is 'user'
|
||||
if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
|
||||
messages_with_padding.append(dummy_yield_message)
|
||||
|
||||
return messages_with_padding
|
||||
def _get_client(self):
|
||||
return genai.Client(api_key=model_settings.gemini_api_key)
|
||||
|
||||
|
||||
def get_gemini_endpoint_and_headers(
|
||||
|
||||
@@ -5,14 +5,16 @@ from typing import List, Optional
|
||||
from google import genai
|
||||
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
|
||||
|
||||
from letta.constants import NON_USER_MSG_PREFIX
|
||||
from letta.helpers.datetime_helpers import get_utc_time_int
|
||||
from letta.helpers.json_helpers import json_dumps, json_loads
|
||||
from letta.llm_api.google_ai_client import GoogleAIClient
|
||||
from letta.llm_api.llm_client_base import LLMClientBase
|
||||
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
||||
from letta.local_llm.utils import count_tokens
|
||||
from letta.log import get_logger
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.message import Message as PydanticMessage
|
||||
from letta.schemas.openai.chat_completion_request import Tool
|
||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
||||
from letta.settings import model_settings, settings
|
||||
from letta.utils import get_tool_call_id
|
||||
@@ -20,18 +22,21 @@ from letta.utils import get_tool_call_id
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class GoogleVertexClient(GoogleAIClient):
|
||||
class GoogleVertexClient(LLMClientBase):
|
||||
|
||||
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||
"""
|
||||
Performs underlying request to llm and returns raw response.
|
||||
"""
|
||||
client = genai.Client(
|
||||
def _get_client(self):
|
||||
return genai.Client(
|
||||
vertexai=True,
|
||||
project=model_settings.google_cloud_project,
|
||||
location=model_settings.google_cloud_location,
|
||||
http_options={"api_version": "v1"},
|
||||
)
|
||||
|
||||
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||
"""
|
||||
Performs underlying request to llm and returns raw response.
|
||||
"""
|
||||
client = self._get_client()
|
||||
response = client.models.generate_content(
|
||||
model=llm_config.model,
|
||||
contents=request_data["contents"],
|
||||
@@ -43,12 +48,7 @@ class GoogleVertexClient(GoogleAIClient):
|
||||
"""
|
||||
Performs underlying request to llm and returns raw response.
|
||||
"""
|
||||
client = genai.Client(
|
||||
vertexai=True,
|
||||
project=model_settings.google_cloud_project,
|
||||
location=model_settings.google_cloud_location,
|
||||
http_options={"api_version": "v1"},
|
||||
)
|
||||
client = self._get_client()
|
||||
response = await client.aio.models.generate_content(
|
||||
model=llm_config.model,
|
||||
contents=request_data["contents"],
|
||||
@@ -56,6 +56,139 @@ class GoogleVertexClient(GoogleAIClient):
|
||||
)
|
||||
return response.model_dump()
|
||||
|
||||
def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
|
||||
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
||||
|
||||
In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
|
||||
so there is no natural follow-up 'model' role message.
|
||||
|
||||
To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
|
||||
with role == 'model' that is placed in-betweeen and function output
|
||||
(role == 'tool') and user message (role == 'user').
|
||||
"""
|
||||
dummy_yield_message = {
|
||||
"role": "model",
|
||||
"parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
|
||||
}
|
||||
messages_with_padding = []
|
||||
for i, message in enumerate(messages):
|
||||
messages_with_padding.append(message)
|
||||
# Check if the current message role is 'tool' and the next message role is 'user'
|
||||
if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
|
||||
messages_with_padding.append(dummy_yield_message)
|
||||
|
||||
return messages_with_padding
|
||||
|
||||
def _clean_google_ai_schema_properties(self, schema_part: dict):
|
||||
"""Recursively clean schema parts to remove unsupported Google AI keywords."""
|
||||
if not isinstance(schema_part, dict):
|
||||
return
|
||||
|
||||
# Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
|
||||
# * Only a subset of the OpenAPI schema is supported.
|
||||
# * Supported parameter types in Python are limited.
|
||||
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
|
||||
keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
|
||||
for key_to_remove in keys_to_remove_at_this_level:
|
||||
logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
|
||||
del schema_part[key_to_remove]
|
||||
|
||||
if schema_part.get("type") == "string" and "format" in schema_part:
|
||||
allowed_formats = ["enum", "date-time"]
|
||||
if schema_part["format"] not in allowed_formats:
|
||||
logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}")
|
||||
del schema_part["format"]
|
||||
|
||||
# Check properties within the current level
|
||||
if "properties" in schema_part and isinstance(schema_part["properties"], dict):
|
||||
for prop_name, prop_schema in schema_part["properties"].items():
|
||||
self._clean_google_ai_schema_properties(prop_schema)
|
||||
|
||||
# Check items within arrays
|
||||
if "items" in schema_part and isinstance(schema_part["items"], dict):
|
||||
self._clean_google_ai_schema_properties(schema_part["items"])
|
||||
|
||||
# Check within anyOf, allOf, oneOf lists
|
||||
for key in ["anyOf", "allOf", "oneOf"]:
|
||||
if key in schema_part and isinstance(schema_part[key], list):
|
||||
for item_schema in schema_part[key]:
|
||||
self._clean_google_ai_schema_properties(item_schema)
|
||||
|
||||
def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
|
||||
"""
|
||||
OpenAI style:
|
||||
"tools": [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "find_movies",
|
||||
"description": "find ....",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
PARAM: {
|
||||
"type": PARAM_TYPE, # eg "string"
|
||||
"description": PARAM_DESCRIPTION,
|
||||
},
|
||||
...
|
||||
},
|
||||
"required": List[str],
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Google AI style:
|
||||
"tools": [{
|
||||
"functionDeclarations": [{
|
||||
"name": "find_movies",
|
||||
"description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
|
||||
"parameters": {
|
||||
"type": "OBJECT",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "STRING",
|
||||
"description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
|
||||
},
|
||||
"description": {
|
||||
"type": "STRING",
|
||||
"description": "Any kind of description including category or genre, title words, attributes, etc."
|
||||
}
|
||||
},
|
||||
"required": ["description"]
|
||||
}
|
||||
}, {
|
||||
"name": "find_theaters",
|
||||
...
|
||||
"""
|
||||
function_list = [
|
||||
dict(
|
||||
name=t.function.name,
|
||||
description=t.function.description,
|
||||
parameters=t.function.parameters, # TODO need to unpack
|
||||
)
|
||||
for t in tools
|
||||
]
|
||||
|
||||
# Add inner thoughts if needed
|
||||
for func in function_list:
|
||||
# Note: Google AI API used to have weird casing requirements, but not any more
|
||||
|
||||
# Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned
|
||||
if "parameters" in func and isinstance(func["parameters"], dict):
|
||||
self._clean_google_ai_schema_properties(func["parameters"])
|
||||
|
||||
# Add inner thoughts
|
||||
if llm_config.put_inner_thoughts_in_kwargs:
|
||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
|
||||
|
||||
func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
|
||||
"type": "string",
|
||||
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
||||
}
|
||||
func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
|
||||
|
||||
return [{"functionDeclarations": function_list}]
|
||||
|
||||
def build_request_data(
|
||||
self,
|
||||
messages: List[PydanticMessage],
|
||||
@@ -66,7 +199,40 @@ class GoogleVertexClient(GoogleAIClient):
|
||||
"""
|
||||
Constructs a request object in the expected data format for this client.
|
||||
"""
|
||||
request_data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
||||
|
||||
if tools:
|
||||
tool_objs = [Tool(type="function", function=t) for t in tools]
|
||||
tool_names = [t.function.name for t in tool_objs]
|
||||
# Convert to the exact payload style Google expects
|
||||
formatted_tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
|
||||
else:
|
||||
tool_names = []
|
||||
|
||||
contents = self.add_dummy_model_messages(
|
||||
[m.to_google_ai_dict() for m in messages],
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"contents": contents,
|
||||
"tools": formatted_tools,
|
||||
"generation_config": {
|
||||
"temperature": llm_config.temperature,
|
||||
"max_output_tokens": llm_config.max_tokens,
|
||||
},
|
||||
}
|
||||
|
||||
# write tool config
|
||||
tool_config = ToolConfig(
|
||||
function_calling_config=FunctionCallingConfig(
|
||||
# ANY mode forces the model to predict only function calls
|
||||
mode=FunctionCallingConfigMode.ANY,
|
||||
# Provide the list of tools (though empty should also work, it seems not to)
|
||||
allowed_function_names=tool_names,
|
||||
)
|
||||
)
|
||||
request_data["tool_config"] = tool_config.model_dump()
|
||||
|
||||
# request_data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
||||
request_data["config"] = request_data.pop("generation_config")
|
||||
request_data["config"]["tools"] = request_data.pop("tools")
|
||||
|
||||
@@ -89,11 +255,11 @@ class GoogleVertexClient(GoogleAIClient):
|
||||
# Add thinking_config
|
||||
# If enable_reasoner is False, set thinking_budget to 0
|
||||
# Otherwise, use the value from max_reasoning_tokens
|
||||
thinking_budget = 0 if not llm_config.enable_reasoner else llm_config.max_reasoning_tokens
|
||||
thinking_config = ThinkingConfig(
|
||||
thinking_budget=thinking_budget,
|
||||
)
|
||||
request_data["config"]["thinking_config"] = thinking_config.model_dump()
|
||||
if llm_config.enable_reasoner:
|
||||
thinking_config = ThinkingConfig(
|
||||
thinking_budget=llm_config.max_reasoning_tokens,
|
||||
)
|
||||
request_data["config"]["thinking_config"] = thinking_config.model_dump()
|
||||
|
||||
return request_data
|
||||
|
||||
|
||||
10
poetry.lock
generated
10
poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
@@ -2123,15 +2123,15 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
|
||||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.10.0"
|
||||
version = "1.15.0"
|
||||
description = "GenAI Python SDK"
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
markers = "extra == \"google\""
|
||||
files = [
|
||||
{file = "google_genai-1.10.0-py3-none-any.whl", hash = "sha256:41b105a2fcf8a027fc45cc16694cd559b8cd1272eab7345ad58cfa2c353bf34f"},
|
||||
{file = "google_genai-1.10.0.tar.gz", hash = "sha256:f59423e0f155dc66b7792c8a0e6724c75c72dc699d1eb7907d4d0006d4f6186f"},
|
||||
{file = "google_genai-1.15.0-py3-none-any.whl", hash = "sha256:6d7f149cc735038b680722bed495004720514c234e2a445ab2f27967955071dd"},
|
||||
{file = "google_genai-1.15.0.tar.gz", hash = "sha256:118bb26960d6343cd64f1aeb5c2b02144a36ad06716d0d1eb1fa3e0904db51f1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -7570,4 +7570,4 @@ tests = ["wikipedia"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "<3.14,>=3.10"
|
||||
content-hash = "19eee9b3cd3d270cb748183bc332dd69706bb0bd3150c62e73e61ed437a40c78"
|
||||
content-hash = "e73bf0ff3ec8b6b839d69f2a6e51228fb61a20030e3b334e74e259361ca8ab43"
|
||||
|
||||
@@ -79,7 +79,7 @@ opentelemetry-api = "1.30.0"
|
||||
opentelemetry-sdk = "1.30.0"
|
||||
opentelemetry-instrumentation-requests = "0.51b0"
|
||||
opentelemetry-exporter-otlp = "1.30.0"
|
||||
google-genai = {version = "^1.1.0", optional = true}
|
||||
google-genai = {version = "^1.15.0", optional = true}
|
||||
faker = "^36.1.0"
|
||||
colorama = "^0.4.6"
|
||||
marshmallow-sqlalchemy = "^1.4.1"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"model": "gemini-2.5-pro-exp-03-25",
|
||||
"model": "gemini-2.5-pro-preview-05-06",
|
||||
"model_endpoint_type": "google_vertex",
|
||||
"model_endpoint": "https://us-central1-aiplatform.googleapis.com/v1/projects/memgpt-428419/locations/us-central1",
|
||||
"context_window": 1048576,
|
||||
|
||||
Reference in New Issue
Block a user