feat(gemini): add 3.1 pro preview support (#9553)
Add 3.1 model metadata for Google AI and update Gemini tests/examples to use the new handle. 👾 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -386,6 +386,7 @@ LLM_MAX_CONTEXT_WINDOW = {
|
|||||||
"gemini-2.5-computer-use-preview-10-2025": 1048576,
|
"gemini-2.5-computer-use-preview-10-2025": 1048576,
|
||||||
# gemini 3
|
# gemini 3
|
||||||
"gemini-3-pro-preview": 1048576,
|
"gemini-3-pro-preview": 1048576,
|
||||||
|
"gemini-3.1-pro-preview": 1048576,
|
||||||
"gemini-3-flash-preview": 1048576,
|
"gemini-3-flash-preview": 1048576,
|
||||||
# gemini latest aliases
|
# gemini latest aliases
|
||||||
"gemini-flash-latest": 1048576,
|
"gemini-flash-latest": 1048576,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
|
GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
|
||||||
"gemini-3-pro-preview": 1048576,
|
"gemini-3-pro-preview": 1048576,
|
||||||
|
"gemini-3.1-pro-preview": 1048576,
|
||||||
"gemini-3-flash-preview": 1048576,
|
"gemini-3-flash-preview": 1048576,
|
||||||
"gemini-2.5-pro": 1048576,
|
"gemini-2.5-pro": 1048576,
|
||||||
"gemini-2.5-flash": 1048576,
|
"gemini-2.5-flash": 1048576,
|
||||||
|
|||||||
@@ -14109,6 +14109,48 @@
|
|||||||
"supports_web_search": true,
|
"supports_web_search": true,
|
||||||
"tpm": 800000
|
"tpm": 800000
|
||||||
},
|
},
|
||||||
|
"gemini/gemini-3.1-pro-preview": {
|
||||||
|
"cache_read_input_token_cost": 2e-7,
|
||||||
|
"cache_read_input_token_cost_above_200k_tokens": 4e-7,
|
||||||
|
"input_cost_per_token": 2e-6,
|
||||||
|
"input_cost_per_token_above_200k_tokens": 4e-6,
|
||||||
|
"input_cost_per_token_batches": 1e-6,
|
||||||
|
"litellm_provider": "gemini",
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_input_tokens": 1048576,
|
||||||
|
"max_output_tokens": 65535,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"max_tokens": 65535,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"mode": "chat",
|
||||||
|
"output_cost_per_token": 1.2e-5,
|
||||||
|
"output_cost_per_token_above_200k_tokens": 1.8e-5,
|
||||||
|
"output_cost_per_token_batches": 6e-6,
|
||||||
|
"rpm": 2000,
|
||||||
|
"source": "https://ai.google.dev/pricing/gemini-3",
|
||||||
|
"supported_endpoints": [
|
||||||
|
"/v1/chat/completions",
|
||||||
|
"/v1/completions",
|
||||||
|
"/v1/batch"
|
||||||
|
],
|
||||||
|
"supported_modalities": ["text", "image", "audio", "video"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_pdf_input": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_video_input": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_web_search": true,
|
||||||
|
"tpm": 800000
|
||||||
|
},
|
||||||
"gemini/gemini-3-flash-preview": {
|
"gemini/gemini-3-flash-preview": {
|
||||||
"cache_read_input_token_cost": 5e-8,
|
"cache_read_input_token_cost": 5e-8,
|
||||||
"input_cost_per_audio_token": 1e-6,
|
"input_cost_per_audio_token": 1e-6,
|
||||||
|
|||||||
21
letta/test_gemini.py
Normal file
21
letta/test_gemini.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from letta_client import Letta
|
||||||
|
|
||||||
|
|
||||||
|
def create_agent() -> None:
|
||||||
|
client = Letta(base_url="http://localhost:8283")
|
||||||
|
|
||||||
|
agent_state = client.agents.create(
|
||||||
|
name="test-gemini-3-pro-agent",
|
||||||
|
model="google_ai/gemini-3.1-pro-preview",
|
||||||
|
embedding="openai/text-embedding-3-small",
|
||||||
|
context_window_limit=16000,
|
||||||
|
)
|
||||||
|
print("Created agent: ", agent_state)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
create_agent()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -46,7 +46,7 @@ CACHE_TEST_CONFIGS = [
|
|||||||
# OpenAI gpt-4o with prompt caching (Chat Completions API)
|
# OpenAI gpt-4o with prompt caching (Chat Completions API)
|
||||||
("openai/gpt-4o", {"provider_type": "openai"}),
|
("openai/gpt-4o", {"provider_type": "openai"}),
|
||||||
# Gemini 3 Pro Preview with context caching
|
# Gemini 3 Pro Preview with context caching
|
||||||
("google_ai/gemini-3-pro-preview", {"provider_type": "google_ai"}),
|
("google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai"}),
|
||||||
]
|
]
|
||||||
|
|
||||||
REASONING_TEST_CONFIGS = [
|
REASONING_TEST_CONFIGS = [
|
||||||
@@ -59,7 +59,7 @@ REASONING_TEST_CONFIGS = [
|
|||||||
("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}),
|
("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}),
|
||||||
# Gemini 3 Pro Preview with thinking enabled
|
# Gemini 3 Pro Preview with thinking enabled
|
||||||
(
|
(
|
||||||
"google_ai/gemini-3-pro-preview",
|
"google_ai/gemini-3.1-pro-preview",
|
||||||
{"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}},
|
{"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -221,12 +221,12 @@ CACHING_TEST_CONFIGS = [
|
|||||||
# The docs say "Implicit caching is enabled by default for all Gemini 2.5 models"
|
# The docs say "Implicit caching is enabled by default for all Gemini 2.5 models"
|
||||||
# This suggests 3 Pro Preview may require explicit caching instead
|
# This suggests 3 Pro Preview may require explicit caching instead
|
||||||
pytest.param(
|
pytest.param(
|
||||||
"google_ai/gemini-3-pro-preview",
|
"google_ai/gemini-3.1-pro-preview",
|
||||||
{},
|
{},
|
||||||
2048, # Min tokens for 3 Pro Preview
|
2048, # Min tokens for 3 Pro Preview
|
||||||
"cached_tokens", # Field name (normalized from cached_content_token_count)
|
"cached_tokens", # Field name (normalized from cached_content_token_count)
|
||||||
None, # No separate write field
|
None, # No separate write field
|
||||||
id="gemini-3-pro-preview-implicit",
|
id="gemini-3.1-pro-preview-implicit",
|
||||||
marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"),
|
marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
@@ -924,7 +924,7 @@ async def test_gemini_3_pro_preview_implicit_caching(async_client: AsyncLetta):
|
|||||||
Since implicit caching is stochastic (depends on routing, timing, etc.), we send
|
Since implicit caching is stochastic (depends on routing, timing, etc.), we send
|
||||||
multiple messages in quick succession and check if ANY of them hit the cache.
|
multiple messages in quick succession and check if ANY of them hit the cache.
|
||||||
"""
|
"""
|
||||||
model = "google_ai/gemini-3-pro-preview"
|
model = "google_ai/gemini-3.1-pro-preview"
|
||||||
agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro")
|
agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user