feat(gemini): add 3.1 pro preview support (#9553)
Add 3.1 model metadata for Google AI and update Gemini tests/examples to use the new handle. 👾 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -386,6 +386,7 @@ LLM_MAX_CONTEXT_WINDOW = {
|
||||
"gemini-2.5-computer-use-preview-10-2025": 1048576,
|
||||
# gemini 3
|
||||
"gemini-3-pro-preview": 1048576,
|
||||
"gemini-3.1-pro-preview": 1048576,
|
||||
"gemini-3-flash-preview": 1048576,
|
||||
# gemini latest aliases
|
||||
"gemini-flash-latest": 1048576,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
|
||||
"gemini-3-pro-preview": 1048576,
|
||||
"gemini-3.1-pro-preview": 1048576,
|
||||
"gemini-3-flash-preview": 1048576,
|
||||
"gemini-2.5-pro": 1048576,
|
||||
"gemini-2.5-flash": 1048576,
|
||||
|
||||
@@ -14109,6 +14109,48 @@
|
||||
"supports_web_search": true,
|
||||
"tpm": 800000
|
||||
},
|
||||
"gemini/gemini-3.1-pro-preview": {
|
||||
"cache_read_input_token_cost": 2e-7,
|
||||
"cache_read_input_token_cost_above_200k_tokens": 4e-7,
|
||||
"input_cost_per_token": 2e-6,
|
||||
"input_cost_per_token_above_200k_tokens": 4e-6,
|
||||
"input_cost_per_token_batches": 1e-6,
|
||||
"litellm_provider": "gemini",
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65535,
|
||||
"max_pdf_size_mb": 30,
|
||||
"max_tokens": 65535,
|
||||
"max_video_length": 1,
|
||||
"max_videos_per_prompt": 10,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.2e-5,
|
||||
"output_cost_per_token_above_200k_tokens": 1.8e-5,
|
||||
"output_cost_per_token_batches": 6e-6,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing/gemini-3",
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/completions",
|
||||
"/v1/batch"
|
||||
],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supports_audio_input": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"tpm": 800000
|
||||
},
|
||||
"gemini/gemini-3-flash-preview": {
|
||||
"cache_read_input_token_cost": 5e-8,
|
||||
"input_cost_per_audio_token": 1e-6,
|
||||
|
||||
21
letta/test_gemini.py
Normal file
21
letta/test_gemini.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from letta_client import Letta
|
||||
|
||||
|
||||
def create_agent() -> None:
|
||||
client = Letta(base_url="http://localhost:8283")
|
||||
|
||||
agent_state = client.agents.create(
|
||||
name="test-gemini-3-pro-agent",
|
||||
model="google_ai/gemini-3.1-pro-preview",
|
||||
embedding="openai/text-embedding-3-small",
|
||||
context_window_limit=16000,
|
||||
)
|
||||
print("Created agent: ", agent_state)
|
||||
|
||||
|
||||
def main():
|
||||
create_agent()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -46,7 +46,7 @@ CACHE_TEST_CONFIGS = [
|
||||
# OpenAI gpt-4o with prompt caching (Chat Completions API)
|
||||
("openai/gpt-4o", {"provider_type": "openai"}),
|
||||
# Gemini 3 Pro Preview with context caching
|
||||
("google_ai/gemini-3-pro-preview", {"provider_type": "google_ai"}),
|
||||
("google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai"}),
|
||||
]
|
||||
|
||||
REASONING_TEST_CONFIGS = [
|
||||
@@ -59,7 +59,7 @@ REASONING_TEST_CONFIGS = [
|
||||
("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}),
|
||||
# Gemini 3 Pro Preview with thinking enabled
|
||||
(
|
||||
"google_ai/gemini-3-pro-preview",
|
||||
"google_ai/gemini-3.1-pro-preview",
|
||||
{"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}},
|
||||
),
|
||||
]
|
||||
|
||||
@@ -221,12 +221,12 @@ CACHING_TEST_CONFIGS = [
|
||||
# The docs say "Implicit caching is enabled by default for all Gemini 2.5 models"
|
||||
# This suggests 3 Pro Preview may require explicit caching instead
|
||||
pytest.param(
|
||||
"google_ai/gemini-3-pro-preview",
|
||||
"google_ai/gemini-3.1-pro-preview",
|
||||
{},
|
||||
2048, # Min tokens for 3 Pro Preview
|
||||
"cached_tokens", # Field name (normalized from cached_content_token_count)
|
||||
None, # No separate write field
|
||||
id="gemini-3-pro-preview-implicit",
|
||||
id="gemini-3.1-pro-preview-implicit",
|
||||
marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"),
|
||||
),
|
||||
]
|
||||
@@ -924,7 +924,7 @@ async def test_gemini_3_pro_preview_implicit_caching(async_client: AsyncLetta):
|
||||
Since implicit caching is stochastic (depends on routing, timing, etc.), we send
|
||||
multiple messages in quick succession and check if ANY of them hit the cache.
|
||||
"""
|
||||
model = "google_ai/gemini-3-pro-preview"
|
||||
model = "google_ai/gemini-3.1-pro-preview"
|
||||
agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro")
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user