diff --git a/letta/llm_api/google_ai.py b/letta/llm_api/google_ai.py deleted file mode 100644 index abf707d0..00000000 --- a/letta/llm_api/google_ai.py +++ /dev/null @@ -1,438 +0,0 @@ -import uuid -from typing import List, Optional, Tuple - -import requests - -from letta.constants import NON_USER_MSG_PREFIX -from letta.helpers.datetime_helpers import get_utc_time -from letta.helpers.json_helpers import json_dumps -from letta.llm_api.helpers import make_post_request -from letta.local_llm.json_parser import clean_json_string_extra_backslash -from letta.local_llm.utils import count_tokens -from letta.schemas.openai.chat_completion_request import Tool -from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics -from letta.tracing import log_event -from letta.utils import get_tool_call_id - - -def get_gemini_endpoint_and_headers( - base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False -) -> Tuple[str, dict]: - """ - Dynamically generate the model endpoint and headers. - """ - url = f"{base_url}/v1beta/models" - - # Add the model - if model is not None: - url += f"/{model}" - - # Add extension for generating content if we're hitting the LM - if generate_content: - url += ":generateContent" - - # Decide if api key should be in header or not - # Two ways to pass the key: https://ai.google.dev/tutorials/setup - if key_in_header: - headers = {"Content-Type": "application/json", "x-goog-api-key": api_key} - else: - url += f"?key={api_key}" - headers = {"Content-Type": "application/json"} - - return url, headers - - -def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]: - from letta.utils import printd - - url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header) - - try: - response = requests.get(url, headers=headers) - printd(f"response = {response}") - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response.json = {response}") - - # Grab the models out - return response - - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - printd(f"Got HTTPError, exception={http_err}") - # Print the HTTP status code - print(f"HTTP Error: {http_err.response.status_code}") - # Print the response content (error message from server) - print(f"Message: {http_err.response.text}") - raise http_err - - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - printd(f"Got RequestException, exception={req_err}") - raise req_err - - except Exception as e: - # Handle other potential errors - printd(f"Got unknown Exception, exception={e}") - raise e - - -def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int: - model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header) - # TODO should this be: - # return model_details["inputTokenLimit"] + model_details["outputTokenLimit"] - return int(model_details["inputTokenLimit"]) - - -def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]: - from letta.utils import printd - - url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header) - - try: - response = requests.get(url, headers=headers) - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - - # Grab the models out - model_list = response["models"] - return model_list - - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - printd(f"Got HTTPError, exception={http_err}") - # Print the HTTP status code - print(f"HTTP Error: {http_err.response.status_code}") - # Print the response content (error message from server) - print(f"Message: {http_err.response.text}") - raise http_err - - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - printd(f"Got RequestException, exception={req_err}") - raise req_err - - except Exception as e: - # Handle other potential errors - printd(f"Got unknown Exception, exception={e}") - raise e - - -def add_dummy_model_messages(messages: List[dict]) -> List[dict]: - """Google AI API requires all function call returns are immediately followed by a 'model' role message. - - In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user, - so there is no natural follow-up 'model' role message. - - To satisfy the Google AI API restrictions, we can add a dummy 'yield' message - with role == 'model' that is placed in-betweeen and function output - (role == 'tool') and user message (role == 'user'). - """ - dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]} - messages_with_padding = [] - for i, message in enumerate(messages): - messages_with_padding.append(message) - # Check if the current message role is 'tool' and the next message role is 'user' - if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"): - messages_with_padding.append(dummy_yield_message) - - return messages_with_padding - - -# TODO use pydantic model as input -def to_google_ai(openai_message_dict: dict) -> dict: - - # TODO supports "parts" as part of multimodal support - assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported" - if openai_message_dict["role"] == "user": - google_ai_message_dict = { - "role": "user", - "parts": [{"text": openai_message_dict["content"]}], - } - elif openai_message_dict["role"] == "assistant": - google_ai_message_dict = { - "role": "model", # NOTE: diff - "parts": [{"text": openai_message_dict["content"]}], - } - elif openai_message_dict["role"] == "tool": - google_ai_message_dict = { - "role": "function", # NOTE: diff - "parts": [{"text": openai_message_dict["content"]}], - } - else: - raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}") - - -# TODO convert return type to pydantic -def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]: - """ - OpenAI style: - "tools": [{ - "type": "function", - "function": { - "name": "find_movies", - "description": "find ....", - "parameters": { - "type": "object", - "properties": { - PARAM: { - "type": PARAM_TYPE, # eg "string" - "description": PARAM_DESCRIPTION, - }, - ... - }, - "required": List[str], - } - } - } - ] - - Google AI style: - "tools": [{ - "functionDeclarations": [{ - "name": "find_movies", - "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.", - "parameters": { - "type": "OBJECT", - "properties": { - "location": { - "type": "STRING", - "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616" - }, - "description": { - "type": "STRING", - "description": "Any kind of description including category or genre, title words, attributes, etc." - } - }, - "required": ["description"] - } - }, { - "name": "find_theaters", - ... - """ - function_list = [ - dict( - name=t.function.name, - description=t.function.description, - parameters=t.function.parameters, # TODO need to unpack - ) - for t in tools - ] - - # Correct casing + add inner thoughts if needed - for func in function_list: - func["parameters"]["type"] = "OBJECT" - for param_name, param_fields in func["parameters"]["properties"].items(): - param_fields["type"] = param_fields["type"].upper() - # Add inner thoughts - if inner_thoughts_in_kwargs: - from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION - - func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = { - "type": "STRING", - "description": INNER_THOUGHTS_KWARG_DESCRIPTION, - } - func["parameters"]["required"].append(INNER_THOUGHTS_KWARG) - - return [{"functionDeclarations": function_list}] - - -def convert_google_ai_response_to_chatcompletion( - response_json: dict, # REST response from Google AI API - model: str, # Required since not returned - input_messages: Optional[List[dict]] = None, # Required if the API doesn't return UsageMetadata - pull_inner_thoughts_from_args: Optional[bool] = True, -) -> ChatCompletionResponse: - """Google AI API response format is not the same as ChatCompletion, requires unpacking - - Example: - { - "candidates": [ - { - "content": { - "parts": [ - { - "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14." - } - ] - } - } - ], - "usageMetadata": { - "promptTokenCount": 9, - "candidatesTokenCount": 27, - "totalTokenCount": 36 - } - } - """ - try: - choices = [] - index = 0 - for candidate in response_json["candidates"]: - content = candidate["content"] - - role = content["role"] - assert role == "model", f"Unknown role in response: {role}" - - parts = content["parts"] - # TODO support parts / multimodal - # TODO support parallel tool calling natively - # TODO Alternative here is to throw away everything else except for the first part - for response_message in parts: - # Convert the actual message style to OpenAI style - if "functionCall" in response_message and response_message["functionCall"] is not None: - function_call = response_message["functionCall"] - assert isinstance(function_call, dict), function_call - function_name = function_call["name"] - assert isinstance(function_name, str), function_name - function_args = function_call["args"] - assert isinstance(function_args, dict), function_args - - # NOTE: this also involves stripping the inner monologue out of the function - if pull_inner_thoughts_from_args: - from letta.local_llm.constants import INNER_THOUGHTS_KWARG - - assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}" - inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG) - assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}" - else: - inner_thoughts = None - - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - tool_calls=[ - ToolCall( - id=get_tool_call_id(), - type="function", - function=FunctionCall( - name=function_name, - arguments=clean_json_string_extra_backslash(json_dumps(function_args)), - ), - ) - ], - ) - - else: - - # Inner thoughts are the content by default - inner_thoughts = response_message["text"] - - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - ) - - # Google AI API uses different finish reason strings than OpenAI - # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null - # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api - # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER - # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason - finish_reason = candidate["finishReason"] - if finish_reason == "STOP": - openai_finish_reason = ( - "function_call" - if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0 - else "stop" - ) - elif finish_reason == "MAX_TOKENS": - openai_finish_reason = "length" - elif finish_reason == "SAFETY": - openai_finish_reason = "content_filter" - elif finish_reason == "RECITATION": - openai_finish_reason = "content_filter" - else: - raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}") - - choices.append( - Choice( - finish_reason=openai_finish_reason, - index=index, - message=openai_response_message, - ) - ) - index += 1 - - # if len(choices) > 1: - # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})") - - # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist? - # "usageMetadata": { - # "promptTokenCount": 9, - # "candidatesTokenCount": 27, - # "totalTokenCount": 36 - # } - if "usageMetadata" in response_json: - usage = UsageStatistics( - prompt_tokens=response_json["usageMetadata"]["promptTokenCount"], - completion_tokens=response_json["usageMetadata"]["candidatesTokenCount"], - total_tokens=response_json["usageMetadata"]["totalTokenCount"], - ) - else: - # Count it ourselves - assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required" - prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation - completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate - total_tokens = prompt_tokens + completion_tokens - usage = UsageStatistics( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - ) - - response_id = str(uuid.uuid4()) - return ChatCompletionResponse( - id=response_id, - choices=choices, - model=model, # NOTE: Google API doesn't pass back model in the response - created=get_utc_time(), - usage=usage, - ) - except KeyError as e: - raise e - - -# TODO convert 'data' type to pydantic -def google_ai_chat_completions_request( - base_url: str, - model: str, - api_key: str, - data: dict, - key_in_header: bool = True, - add_postfunc_model_messages: bool = True, - # NOTE: Google AI API doesn't support mixing parts 'text' and 'function', - # so there's no clean way to put inner thoughts in the same message as a function call - inner_thoughts_in_kwargs: bool = True, -) -> ChatCompletionResponse: - """https://ai.google.dev/docs/function_calling - - From https://ai.google.dev/api/rest#service-endpoint: - "A service endpoint is a base URL that specifies the network address of an API service. - One service might have multiple service endpoints. - This service has the following service endpoint and all URIs below are relative to this service endpoint: - https://xxx.googleapis.com - """ - - assert api_key is not None, "Missing api_key when calling Google AI" - - url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header, generate_content=True) - - # data["contents"][-1]["role"] = "model" - if add_postfunc_model_messages: - data["contents"] = add_dummy_model_messages(data["contents"]) - - log_event(name="llm_request_sent", attributes=data) - response_json = make_post_request(url, headers, data) - log_event(name="llm_response_received", attributes=response_json) - try: - return convert_google_ai_response_to_chatcompletion( - response_json=response_json, - model=data.get("model"), - input_messages=data["contents"], - pull_inner_thoughts_from_args=inner_thoughts_in_kwargs, - ) - except Exception as conversion_error: - print(f"Error during response conversion: {conversion_error}") - raise conversion_error diff --git a/letta/llm_api/google_vertex.py b/letta/llm_api/google_vertex.py deleted file mode 100644 index 4e85abf3..00000000 --- a/letta/llm_api/google_vertex.py +++ /dev/null @@ -1,346 +0,0 @@ -import uuid -from typing import List, Optional - -from letta.constants import NON_USER_MSG_PREFIX -from letta.helpers.datetime_helpers import get_utc_time -from letta.helpers.json_helpers import json_dumps -from letta.local_llm.json_parser import clean_json_string_extra_backslash -from letta.local_llm.utils import count_tokens -from letta.schemas.openai.chat_completion_request import Tool -from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics -from letta.tracing import log_event -from letta.utils import get_tool_call_id - - -def add_dummy_model_messages(messages: List[dict]) -> List[dict]: - """Google AI API requires all function call returns are immediately followed by a 'model' role message. - - In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user, - so there is no natural follow-up 'model' role message. - - To satisfy the Google AI API restrictions, we can add a dummy 'yield' message - with role == 'model' that is placed in-betweeen and function output - (role == 'tool') and user message (role == 'user'). - """ - dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]} - messages_with_padding = [] - for i, message in enumerate(messages): - messages_with_padding.append(message) - # Check if the current message role is 'tool' and the next message role is 'user' - if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"): - messages_with_padding.append(dummy_yield_message) - - return messages_with_padding - - -# TODO use pydantic model as input -def to_google_ai(openai_message_dict: dict) -> dict: - - # TODO supports "parts" as part of multimodal support - assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported" - if openai_message_dict["role"] == "user": - google_ai_message_dict = { - "role": "user", - "parts": [{"text": openai_message_dict["content"]}], - } - elif openai_message_dict["role"] == "assistant": - google_ai_message_dict = { - "role": "model", # NOTE: diff - "parts": [{"text": openai_message_dict["content"]}], - } - elif openai_message_dict["role"] == "tool": - google_ai_message_dict = { - "role": "function", # NOTE: diff - "parts": [{"text": openai_message_dict["content"]}], - } - else: - raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}") - - -# TODO convert return type to pydantic -def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]: - """ - OpenAI style: - "tools": [{ - "type": "function", - "function": { - "name": "find_movies", - "description": "find ....", - "parameters": { - "type": "object", - "properties": { - PARAM: { - "type": PARAM_TYPE, # eg "string" - "description": PARAM_DESCRIPTION, - }, - ... - }, - "required": List[str], - } - } - } - ] - - Google AI style: - "tools": [{ - "functionDeclarations": [{ - "name": "find_movies", - "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.", - "parameters": { - "type": "OBJECT", - "properties": { - "location": { - "type": "STRING", - "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616" - }, - "description": { - "type": "STRING", - "description": "Any kind of description including category or genre, title words, attributes, etc." - } - }, - "required": ["description"] - } - }, { - "name": "find_theaters", - ... - """ - function_list = [ - dict( - name=t.function.name, - description=t.function.description, - parameters=t.function.parameters, # TODO need to unpack - ) - for t in tools - ] - - # Correct casing + add inner thoughts if needed - for func in function_list: - func["parameters"]["type"] = "OBJECT" - for param_name, param_fields in func["parameters"]["properties"].items(): - param_fields["type"] = param_fields["type"].upper() - # Add inner thoughts - if inner_thoughts_in_kwargs: - from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION - - func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = { - "type": "STRING", - "description": INNER_THOUGHTS_KWARG_DESCRIPTION, - } - func["parameters"]["required"].append(INNER_THOUGHTS_KWARG) - - return [{"functionDeclarations": function_list}] - - -def convert_google_ai_response_to_chatcompletion( - response, - model: str, # Required since not returned - input_messages: Optional[List[dict]] = None, # Required if the API doesn't return UsageMetadata - pull_inner_thoughts_from_args: Optional[bool] = True, -) -> ChatCompletionResponse: - """Google AI API response format is not the same as ChatCompletion, requires unpacking - - Example: - { - "candidates": [ - { - "content": { - "parts": [ - { - "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14." - } - ] - } - } - ], - "usageMetadata": { - "promptTokenCount": 9, - "candidatesTokenCount": 27, - "totalTokenCount": 36 - } - } - """ - try: - choices = [] - index = 0 - for candidate in response.candidates: - content = candidate.content - - role = content.role - assert role == "model", f"Unknown role in response: {role}" - - parts = content.parts - # TODO support parts / multimodal - # TODO support parallel tool calling natively - # TODO Alternative here is to throw away everything else except for the first part - for response_message in parts: - # Convert the actual message style to OpenAI style - if response_message.function_call: - function_call = response_message.function_call - function_name = function_call.name - function_args = function_call.args - assert isinstance(function_args, dict), function_args - - # NOTE: this also involves stripping the inner monologue out of the function - if pull_inner_thoughts_from_args: - from letta.local_llm.constants import INNER_THOUGHTS_KWARG - - assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}" - inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG) - assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}" - else: - inner_thoughts = None - - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - tool_calls=[ - ToolCall( - id=get_tool_call_id(), - type="function", - function=FunctionCall( - name=function_name, - arguments=clean_json_string_extra_backslash(json_dumps(function_args)), - ), - ) - ], - ) - - else: - - # Inner thoughts are the content by default - inner_thoughts = response_message.text - - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - ) - - # Google AI API uses different finish reason strings than OpenAI - # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null - # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api - # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER - # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason - finish_reason = candidate.finish_reason.value - if finish_reason == "STOP": - openai_finish_reason = ( - "function_call" - if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0 - else "stop" - ) - elif finish_reason == "MAX_TOKENS": - openai_finish_reason = "length" - elif finish_reason == "SAFETY": - openai_finish_reason = "content_filter" - elif finish_reason == "RECITATION": - openai_finish_reason = "content_filter" - else: - raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}") - - choices.append( - Choice( - finish_reason=openai_finish_reason, - index=index, - message=openai_response_message, - ) - ) - index += 1 - - # if len(choices) > 1: - # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})") - - # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist? - # "usageMetadata": { - # "promptTokenCount": 9, - # "candidatesTokenCount": 27, - # "totalTokenCount": 36 - # } - if response.usage_metadata: - usage = UsageStatistics( - prompt_tokens=response.usage_metadata.prompt_token_count, - completion_tokens=response.usage_metadata.candidates_token_count, - total_tokens=response.usage_metadata.total_token_count, - ) - else: - # Count it ourselves - assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required" - prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation - completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate - total_tokens = prompt_tokens + completion_tokens - usage = UsageStatistics( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - ) - - response_id = str(uuid.uuid4()) - return ChatCompletionResponse( - id=response_id, - choices=choices, - model=model, # NOTE: Google API doesn't pass back model in the response - created=get_utc_time(), - usage=usage, - ) - except KeyError as e: - raise e - - -# TODO convert 'data' type to pydantic -def google_vertex_chat_completions_request( - model: str, - project_id: str, - region: str, - contents: List[dict], - config: dict, - add_postfunc_model_messages: bool = True, - # NOTE: Google AI API doesn't support mixing parts 'text' and 'function', - # so there's no clean way to put inner thoughts in the same message as a function call - inner_thoughts_in_kwargs: bool = True, -) -> ChatCompletionResponse: - """https://ai.google.dev/docs/function_calling - - From https://ai.google.dev/api/rest#service-endpoint: - "A service endpoint is a base URL that specifies the network address of an API service. - One service might have multiple service endpoints. - This service has the following service endpoint and all URIs below are relative to this service endpoint: - https://xxx.googleapis.com - """ - - from google import genai - from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig - - client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"}) - # add dummy model messages to the end of the input - if add_postfunc_model_messages: - contents = add_dummy_model_messages(contents) - - tool_config = ToolConfig( - function_calling_config=FunctionCallingConfig( - # ANY mode forces the model to predict only function calls - mode=FunctionCallingConfigMode.ANY, - ) - ) - config["tool_config"] = tool_config.model_dump() - - # make request to client - attributes = config if isinstance(config, dict) else {"config": config} - attributes.update({"contents": contents}) - log_event(name="llm_request_sent", attributes={"contents": contents, "config": config}) - response = client.models.generate_content( - model=model, - contents=contents, - config=config, - ) - - # convert back response - try: - return convert_google_ai_response_to_chatcompletion( - response=response, - model=model, - input_messages=contents, - pull_inner_thoughts_from_args=inner_thoughts_in_kwargs, - ) - except Exception as conversion_error: - print(f"Error during response conversion: {conversion_error}") - raise conversion_error diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 82d0aea6..05ce7b5e 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -15,7 +15,6 @@ from letta.llm_api.anthropic import ( from letta.llm_api.aws_bedrock import has_valid_aws_credentials from letta.llm_api.azure_openai import azure_openai_chat_completions_request from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion -from letta.llm_api.google_ai import convert_tools_to_google_ai_format, google_ai_chat_completions_request from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs from letta.llm_api.openai import ( build_openai_chat_completions_request, @@ -27,7 +26,7 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message -from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool, cast_message_to_subtype +from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype from letta.schemas.openai.chat_completion_response import ChatCompletionResponse from letta.settings import ModelSettings from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface @@ -314,58 +313,6 @@ def create( return response - elif llm_config.model_endpoint_type == "google_ai": - if stream: - raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}") - if not use_tool_naming: - raise NotImplementedError("Only tool calling supported on Google AI API requests") - - if functions is not None: - tools = [{"type": "function", "function": f} for f in functions] - tools = [Tool(**t) for t in tools] - tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs) - else: - tools = None - - return google_ai_chat_completions_request( - base_url=llm_config.model_endpoint, - model=llm_config.model, - api_key=model_settings.gemini_api_key, - # see structure of payload here: https://ai.google.dev/docs/function_calling - data=dict( - contents=[m.to_google_ai_dict() for m in messages], - tools=tools, - generation_config={"temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens}, - ), - inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs, - ) - - elif llm_config.model_endpoint_type == "google_vertex": - from letta.llm_api.google_vertex import google_vertex_chat_completions_request - - if stream: - raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}") - if not use_tool_naming: - raise NotImplementedError("Only tool calling supported on Google Vertex AI API requests") - - if functions is not None: - tools = [{"type": "function", "function": f} for f in functions] - tools = [Tool(**t) for t in tools] - tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs) - else: - tools = None - - config = {"tools": tools, "temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens} - - return google_vertex_chat_completions_request( - model=llm_config.model, - project_id=model_settings.google_cloud_project, - region=model_settings.google_cloud_location, - contents=[m.to_google_ai_dict() for m in messages], - config=config, - inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs, - ) - elif llm_config.model_endpoint_type == "anthropic": if not use_tool_naming: raise NotImplementedError("Only tool calling supported on Anthropic API requests")