diff --git a/.skills/llm-provider-usage-statistics/SKILL.md b/.skills/llm-provider-usage-statistics/SKILL.md new file mode 100644 index 00000000..ff06467c --- /dev/null +++ b/.skills/llm-provider-usage-statistics/SKILL.md @@ -0,0 +1,43 @@ +--- +name: llm-provider-usage-statistics +description: Reference guide for token counting and prefix caching across LLM providers (OpenAI, Anthropic, Gemini). Use when debugging token counts or optimizing prefix caching. +--- + +# LLM Provider Usage Statistics + +Reference documentation for how different LLM providers report token usage. + +## Quick Reference: Token Counting Semantics + +| Provider | `input_tokens` meaning | Cache tokens | Must add cache to get total? | +|----------|------------------------|--------------|------------------------------| +| OpenAI | TOTAL (includes cached) | `cached_tokens` is subset | No | +| Anthropic | NON-cached only | `cache_read_input_tokens` + `cache_creation_input_tokens` | **Yes** | +| Gemini | TOTAL (includes cached) | `cached_content_token_count` is subset | No | + +**Critical difference:** Anthropic's `input_tokens` excludes cached tokens, so you must add them: +``` +total_input = input_tokens + cache_read_input_tokens + cache_creation_input_tokens +``` + +## Quick Reference: Prefix Caching + +| Provider | Min tokens | How to enable | TTL | +|----------|-----------|---------------|-----| +| OpenAI | 1,024 | Automatic | ~5-10 min | +| Anthropic | 1,024 | Requires `cache_control` breakpoints | 5 min | +| Gemini 2.0+ | 1,024 | Automatic (implicit) | Variable | + +## Quick Reference: Reasoning/Thinking Tokens + +| Provider | Field name | Models | +|----------|-----------|--------| +| OpenAI | `reasoning_tokens` | o1, o3 models | +| Anthropic | N/A | (thinking is in content blocks, not usage) | +| Gemini | `thoughts_token_count` | Gemini 2.0 with thinking enabled | + +## Provider Reference Files + +- **OpenAI:** [references/openai.md](references/openai.md) - Chat Completions vs Responses API, reasoning models, cached_tokens +- **Anthropic:** [references/anthropic.md](references/anthropic.md) - cache_control setup, beta headers, cache token fields +- **Gemini:** [references/gemini.md](references/gemini.md) - implicit caching, thinking tokens, usage_metadata fields diff --git a/.skills/llm-provider-usage-statistics/references/anthropic.md b/.skills/llm-provider-usage-statistics/references/anthropic.md new file mode 100644 index 00000000..7dfdd6fb --- /dev/null +++ b/.skills/llm-provider-usage-statistics/references/anthropic.md @@ -0,0 +1,83 @@ +# Anthropic Usage Statistics + +## Response Format + +``` +response.usage.input_tokens # NON-cached input tokens only +response.usage.output_tokens # Output tokens +response.usage.cache_read_input_tokens # Tokens read from cache +response.usage.cache_creation_input_tokens # Tokens written to cache +``` + +## Critical: Token Calculation + +**Anthropic's `input_tokens` is NOT the total.** To get total input tokens: + +```python +total_input = input_tokens + cache_read_input_tokens + cache_creation_input_tokens +``` + +This is different from OpenAI/Gemini where `prompt_tokens` is already the total. + +## Prefix Caching (Prompt Caching) + +**Requirements:** +- Minimum 1,024 tokens for Claude 3.5 Haiku/Sonnet +- Minimum 2,048 tokens for Claude 3 Opus +- Requires explicit `cache_control` breakpoints in messages +- TTL: 5 minutes + +**How to enable:** +Add `cache_control` to message content: +```python +{ + "role": "user", + "content": [ + { + "type": "text", + "text": "...", + "cache_control": {"type": "ephemeral"} + } + ] +} +``` + +**Beta header required:** +```python +betas = ["prompt-caching-2024-07-31"] +``` + +## Cache Behavior + +- `cache_creation_input_tokens`: Tokens that were cached on this request (cache write) +- `cache_read_input_tokens`: Tokens that were read from existing cache (cache hit) +- On first request: expect `cache_creation_input_tokens > 0` +- On subsequent requests with same prefix: expect `cache_read_input_tokens > 0` + +## Streaming + +In streaming mode, usage is reported in two events: + +1. **`message_start`**: Initial usage (may have cache info) + ```python + event.message.usage.input_tokens + event.message.usage.output_tokens + event.message.usage.cache_read_input_tokens + event.message.usage.cache_creation_input_tokens + ``` + +2. **`message_delta`**: Cumulative output tokens + ```python + event.usage.output_tokens # This is CUMULATIVE, not incremental + ``` + +**Important:** Per Anthropic docs, `message_delta` token counts are cumulative, so assign (don't accumulate). + +## Letta Implementation + +- **Client:** `letta/llm_api/anthropic_client.py` +- **Streaming interfaces:** + - `letta/interfaces/anthropic_streaming_interface.py` + - `letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py` (tracks cache tokens) +- **Extract method:** `AnthropicClient.extract_usage_statistics()` +- **Cache control:** `_add_cache_control_to_system_message()`, `_add_cache_control_to_messages()` diff --git a/.skills/llm-provider-usage-statistics/references/gemini.md b/.skills/llm-provider-usage-statistics/references/gemini.md new file mode 100644 index 00000000..2df708f5 --- /dev/null +++ b/.skills/llm-provider-usage-statistics/references/gemini.md @@ -0,0 +1,81 @@ +# Gemini Usage Statistics + +## Response Format + +Gemini returns usage in `usage_metadata`: + +``` +response.usage_metadata.prompt_token_count # Total input tokens +response.usage_metadata.candidates_token_count # Output tokens +response.usage_metadata.total_token_count # Sum +response.usage_metadata.cached_content_token_count # Tokens from cache (optional) +response.usage_metadata.thoughts_token_count # Reasoning tokens (optional) +``` + +## Token Counting + +- `prompt_token_count` is the TOTAL (includes cached) +- `cached_content_token_count` is a subset (when present) +- Similar to OpenAI's semantics + +## Implicit Caching (Gemini 2.0+) + +**Requirements:** +- Minimum 1,024 tokens +- Automatic (no opt-in required) +- Available on Gemini 2.0 Flash and later models + +**Behavior:** +- Caching is probabilistic and server-side +- `cached_content_token_count` may or may not be present +- When present, indicates tokens that were served from cache + +**Note:** Unlike Anthropic, Gemini doesn't have explicit cache_control. Caching is implicit and managed by Google's infrastructure. + +## Reasoning/Thinking Tokens + +For models with extended thinking (like Gemini 2.0 with thinking enabled): +- `thoughts_token_count` reports tokens used for reasoning +- These are similar to OpenAI's `reasoning_tokens` + +**Enabling thinking:** +```python +generation_config = { + "thinking_config": { + "thinking_budget": 1024 # Max thinking tokens + } +} +``` + +## Streaming + +In streaming mode: +- `usage_metadata` is typically in the **final chunk** +- Same fields as non-streaming +- May not be present in intermediate chunks + +**Important:** `stream_async()` returns an async generator (not awaitable): +```python +# Correct: +stream = client.stream_async(request_data, llm_config) +async for chunk in stream: + ... + +# Incorrect (will error): +stream = await client.stream_async(...) # TypeError! +``` + +## APIs + +Gemini has two APIs: +- **Google AI (google_ai):** Uses `google.genai` SDK +- **Vertex AI (google_vertex):** Uses same SDK with different auth + +Both share the same response format. + +## Letta Implementation + +- **Client:** `letta/llm_api/google_vertex_client.py` (handles both google_ai and google_vertex) +- **Streaming interface:** `letta/interfaces/gemini_streaming_interface.py` +- **Extract method:** `GoogleVertexClient.extract_usage_statistics()` +- Response is a `GenerateContentResponse` object with `.usage_metadata` attribute diff --git a/.skills/llm-provider-usage-statistics/references/openai.md b/.skills/llm-provider-usage-statistics/references/openai.md new file mode 100644 index 00000000..25913791 --- /dev/null +++ b/.skills/llm-provider-usage-statistics/references/openai.md @@ -0,0 +1,61 @@ +# OpenAI Usage Statistics + +## APIs and Response Formats + +OpenAI has two APIs with different response structures: + +### Chat Completions API +``` +response.usage.prompt_tokens # Total input tokens (includes cached) +response.usage.completion_tokens # Output tokens +response.usage.total_tokens # Sum +response.usage.prompt_tokens_details.cached_tokens # Subset that was cached +response.usage.completion_tokens_details.reasoning_tokens # For o1/o3 models +``` + +### Responses API (newer) +``` +response.usage.input_tokens # Total input tokens +response.usage.output_tokens # Output tokens +response.usage.total_tokens # Sum +response.usage.input_tokens_details.cached_tokens # Subset that was cached +response.usage.output_tokens_details.reasoning_tokens # For reasoning models +``` + +## Prefix Caching + +**Requirements:** +- Minimum 1,024 tokens in the prefix +- Automatic (no opt-in required) +- Cached in 128-token increments +- TTL: approximately 5-10 minutes of inactivity + +**Supported models:** GPT-4o, GPT-4o-mini, o1, o1-mini, o3-mini + +**Cache behavior:** +- `cached_tokens` will be a multiple of 128 +- Cache hit means those tokens were not re-processed +- Cost: cached tokens are cheaper than non-cached + +## Reasoning Models (o1, o3) + +For reasoning models, additional tokens are used for "thinking": +- `reasoning_tokens` in `completion_tokens_details` +- These are output tokens used for internal reasoning +- Not visible in the response content + +## Streaming + +In streaming mode, usage is reported in the **final chunk** when `stream_options.include_usage=True`: +```python +request_data["stream_options"] = {"include_usage": True} +``` + +The final chunk will have `chunk.usage` with the same structure as non-streaming. + +## Letta Implementation + +- **Client:** `letta/llm_api/openai_client.py` +- **Streaming interface:** `letta/interfaces/openai_streaming_interface.py` +- **Extract method:** `OpenAIClient.extract_usage_statistics()` +- Uses OpenAI SDK's pydantic models (`ChatCompletion`) for type-safe parsing diff --git a/alembic/versions/297e8217e952_nullable_embedding_for_archives_and_.py b/alembic/versions/297e8217e952_nullable_embedding_for_archives_and_.py new file mode 100644 index 00000000..69aa8f05 --- /dev/null +++ b/alembic/versions/297e8217e952_nullable_embedding_for_archives_and_.py @@ -0,0 +1,36 @@ +"""nullable embedding for archives and passages + +Revision ID: 297e8217e952 +Revises: 308a180244fc +Create Date: 2026-01-20 14:11:21.137232 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "297e8217e952" +down_revision: Union[str, None] = "308a180244fc" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column("archival_passages", "embedding_config", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=True) + op.alter_column("archives", "embedding_config", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=True) + op.alter_column("source_passages", "embedding_config", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=True) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column("source_passages", "embedding_config", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=False) + op.alter_column("archives", "embedding_config", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=False) + op.alter_column("archival_passages", "embedding_config", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=False) + # ### end Alembic commands ### diff --git a/alembic/versions/308a180244fc_last_synced_column_for_providers.py b/alembic/versions/308a180244fc_last_synced_column_for_providers.py new file mode 100644 index 00000000..03aa169c --- /dev/null +++ b/alembic/versions/308a180244fc_last_synced_column_for_providers.py @@ -0,0 +1,31 @@ +"""last_synced column for providers + +Revision ID: 308a180244fc +Revises: 82feb220a9b8 +Create Date: 2026-01-05 18:54:15.996786 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "308a180244fc" +down_revision: Union[str, None] = "82feb220a9b8" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("providers", sa.Column("last_synced", sa.DateTime(timezone=True), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("providers", "last_synced") + # ### end Alembic commands ### diff --git a/alembic/versions/9275f62ad282_add_v2_protocol_fields_to_provider_traces.py b/alembic/versions/9275f62ad282_add_v2_protocol_fields_to_provider_traces.py new file mode 100644 index 00000000..97fa2f73 --- /dev/null +++ b/alembic/versions/9275f62ad282_add_v2_protocol_fields_to_provider_traces.py @@ -0,0 +1,32 @@ +"""Add v2 protocol fields to provider_traces + +Revision ID: 9275f62ad282 +Revises: 297e8217e952 +Create Date: 2026-01-22 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +revision: str = "9275f62ad282" +down_revision: Union[str, None] = "297e8217e952" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column("provider_traces", sa.Column("org_id", sa.String(), nullable=True)) + op.add_column("provider_traces", sa.Column("user_id", sa.String(), nullable=True)) + op.add_column("provider_traces", sa.Column("compaction_settings", sa.JSON(), nullable=True)) + op.add_column("provider_traces", sa.Column("llm_config", sa.JSON(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("provider_traces", "llm_config") + op.drop_column("provider_traces", "compaction_settings") + op.drop_column("provider_traces", "user_id") + op.drop_column("provider_traces", "org_id") diff --git a/alembic/versions/a1b2c3d4e5f8_create_provider_trace_metadata_table.py b/alembic/versions/a1b2c3d4e5f8_create_provider_trace_metadata_table.py new file mode 100644 index 00000000..c4d3f1c7 --- /dev/null +++ b/alembic/versions/a1b2c3d4e5f8_create_provider_trace_metadata_table.py @@ -0,0 +1,59 @@ +"""create provider_trace_metadata table + +Revision ID: a1b2c3d4e5f8 +Revises: 9275f62ad282 +Create Date: 2026-01-28 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op +from letta.settings import settings + +revision: str = "a1b2c3d4e5f8" +down_revision: Union[str, None] = "9275f62ad282" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + if not settings.letta_pg_uri_no_default: + return + + op.create_table( + "provider_trace_metadata", + sa.Column("id", sa.String(), nullable=False), + sa.Column("step_id", sa.String(), nullable=True), + sa.Column("agent_id", sa.String(), nullable=True), + sa.Column("agent_tags", sa.JSON(), nullable=True), + sa.Column("call_type", sa.String(), nullable=True), + sa.Column("run_id", sa.String(), nullable=True), + sa.Column("source", sa.String(), nullable=True), + sa.Column("org_id", sa.String(), nullable=True), + sa.Column("user_id", sa.String(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("is_deleted", sa.Boolean(), server_default=sa.text("FALSE"), nullable=False), + sa.Column("_created_by_id", sa.String(), nullable=True), + sa.Column("_last_updated_by_id", sa.String(), nullable=True), + sa.Column("organization_id", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["organization_id"], + ["organizations.id"], + ), + sa.PrimaryKeyConstraint("created_at", "id"), + ) + op.create_index("ix_provider_trace_metadata_step_id", "provider_trace_metadata", ["step_id"], unique=False) + op.create_index("ix_provider_trace_metadata_id", "provider_trace_metadata", ["id"], unique=True) + + +def downgrade() -> None: + if not settings.letta_pg_uri_no_default: + return + + op.drop_index("ix_provider_trace_metadata_id", table_name="provider_trace_metadata") + op.drop_index("ix_provider_trace_metadata_step_id", table_name="provider_trace_metadata") + op.drop_table("provider_trace_metadata") diff --git a/dev-compose.yaml b/dev-compose.yaml index 81d08478..c1127aa0 100644 --- a/dev-compose.yaml +++ b/dev-compose.yaml @@ -14,7 +14,7 @@ services: - ./.persist/pgdata-test:/var/lib/postgresql/data - ./init.sql:/docker-entrypoint-initdb.d/init.sql ports: - - "5432:5432" + - '5432:5432' letta_server: image: letta/letta:latest hostname: letta @@ -25,8 +25,8 @@ services: depends_on: - letta_db ports: - - "8083:8083" - - "8283:8283" + - '8083:8083' + - '8283:8283' environment: - LETTA_PG_DB=${LETTA_PG_DB:-letta} - LETTA_PG_USER=${LETTA_PG_USER:-letta} diff --git a/fern/openapi.json b/fern/openapi.json new file mode 100644 index 00000000..42154985 --- /dev/null +++ b/fern/openapi.json @@ -0,0 +1,48587 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "Letta API", + "version": "1.0.0" + }, + "servers": [ + { + "url": "https://app.letta.com", + "description": "Letta Cloud" + }, + { + "url": "http://localhost:8283", + "description": "Self-hosted" + } + ], + "security": [ + { + "bearerAuth": [] + } + ], + "paths": { + "/v1/archives/": { + "post": { + "tags": ["archives"], + "summary": "Create Archive", + "description": "Create a new archive.", + "operationId": "create_archive", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArchiveCreateRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Archive" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["archives"], + "summary": "List Archives", + "description": "Get a list of all archives for the current organization with optional filters and pagination.", + "operationId": "list_archives", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Archive ID cursor for pagination. Returns archives that come before this archive ID in the specified sort order", + "title": "Before" + }, + "description": "Archive ID cursor for pagination. Returns archives that come before this archive ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Archive ID cursor for pagination. Returns archives that come after this archive ID in the specified sort order", + "title": "After" + }, + "description": "Archive ID cursor for pagination. Returns archives that come after this archive ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of archives to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of archives to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for archives by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for archives by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by archive name (exact match)", + "title": "Name" + }, + "description": "Filter by archive name (exact match)" + }, + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Only archives attached to this agent ID", + "title": "Agent Id" + }, + "description": "Only archives attached to this agent ID" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Archive" + }, + "title": "Response List Archives" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/archives/{archive_id}": { + "get": { + "tags": ["archives"], + "summary": "Retrieve Archive", + "description": "Get a single archive by its ID.", + "operationId": "retrieve_archive", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^archive-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the archive in the format 'archive-'", + "examples": ["archive-123e4567-e89b-42d3-8456-426614174000"], + "title": "Archive Id" + }, + "description": "The ID of the archive in the format 'archive-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Archive" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["archives"], + "summary": "Modify Archive", + "description": "Update an existing archive's name and/or description.", + "operationId": "modify_archive", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^archive-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the archive in the format 'archive-'", + "examples": ["archive-123e4567-e89b-42d3-8456-426614174000"], + "title": "Archive Id" + }, + "description": "The ID of the archive in the format 'archive-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArchiveUpdateRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Archive" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["archives"], + "summary": "Delete Archive", + "description": "Delete an archive by its ID.", + "operationId": "delete_archive", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^archive-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the archive in the format 'archive-'", + "examples": ["archive-123e4567-e89b-42d3-8456-426614174000"], + "title": "Archive Id" + }, + "description": "The ID of the archive in the format 'archive-'" + } + ], + "responses": { + "204": { + "description": "Successful Response" + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/archives/{archive_id}/agents": { + "get": { + "tags": ["archives"], + "summary": "List Agents For Archive", + "description": "Get a list of agents that have access to an archive with pagination support.", + "operationId": "list_agents_for_archive", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^archive-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the archive in the format 'archive-'", + "examples": ["archive-123e4567-e89b-42d3-8456-426614174000"], + "title": "Archive Id" + }, + "description": "The ID of the archive in the format 'archive-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order", + "title": "Before" + }, + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order", + "title": "After" + }, + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of agents to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of agents to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "include", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "enum": [ + "agent.blocks", + "agent.identities", + "agent.managed_group", + "agent.pending_approval", + "agent.secrets", + "agent.sources", + "agent.tags", + "agent.tools" + ], + "type": "string" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default.", + "default": [], + "title": "Include" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentState" + }, + "title": "Response List Agents For Archive" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/archives/{archive_id}/passages": { + "post": { + "tags": ["archives"], + "summary": "Create Passage In Archive", + "description": "Create a new passage in an archive.\n\nThis adds a passage to the archive and creates embeddings for vector storage.", + "operationId": "create_passage_in_archive", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^archive-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the archive in the format 'archive-'", + "examples": ["archive-123e4567-e89b-42d3-8456-426614174000"], + "title": "Archive Id" + }, + "description": "The ID of the archive in the format 'archive-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PassageCreateRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Passage" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/archives/{archive_id}/passages/batch": { + "post": { + "tags": ["archives"], + "summary": "Create Passages In Archive", + "description": "Create multiple passages in an archive.\n\nThis adds passages to the archive and creates embeddings for vector storage.", + "operationId": "create_passages_in_archive", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^archive-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the archive in the format 'archive-'", + "examples": ["archive-123e4567-e89b-42d3-8456-426614174000"], + "title": "Archive Id" + }, + "description": "The ID of the archive in the format 'archive-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PassageBatchCreateRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Passage" + }, + "title": "Response Create Passages In Archive" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/archives/{archive_id}/passages/{passage_id}": { + "delete": { + "tags": ["archives"], + "summary": "Delete Passage From Archive", + "description": "Delete a passage from an archive.\n\nThis permanently removes the passage from both the database and vector storage (if applicable).", + "operationId": "delete_passage_from_archive", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^archive-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the archive in the format 'archive-'", + "examples": ["archive-123e4567-e89b-42d3-8456-426614174000"], + "title": "Archive Id" + }, + "description": "The ID of the archive in the format 'archive-'" + }, + { + "name": "passage_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^passage-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the passage in the format 'passage-'", + "examples": ["passage-123e4567-e89b-42d3-8456-426614174000"], + "title": "Passage Id" + }, + "description": "The ID of the passage in the format 'passage-'" + } + ], + "responses": { + "204": { + "description": "Successful Response" + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/{tool_id}": { + "delete": { + "tags": ["tools"], + "summary": "Delete Tool", + "description": "Delete a tool by name", + "operationId": "delete_tool", + "parameters": [ + { + "name": "tool_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^tool-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the tool in the format 'tool-'", + "examples": ["tool-123e4567-e89b-42d3-8456-426614174000"], + "title": "Tool Id" + }, + "description": "The ID of the tool in the format 'tool-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["tools"], + "summary": "Retrieve Tool", + "description": "Get a tool by ID", + "operationId": "retrieve_tool", + "parameters": [ + { + "name": "tool_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^tool-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the tool in the format 'tool-'", + "examples": ["tool-123e4567-e89b-42d3-8456-426614174000"], + "title": "Tool Id" + }, + "description": "The ID of the tool in the format 'tool-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Tool" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["tools"], + "summary": "Modify Tool", + "description": "Update an existing tool", + "operationId": "modify_tool", + "parameters": [ + { + "name": "tool_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^tool-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the tool in the format 'tool-'", + "examples": ["tool-123e4567-e89b-42d3-8456-426614174000"], + "title": "Tool Id" + }, + "description": "The ID of the tool in the format 'tool-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Tool" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/count": { + "get": { + "tags": ["tools"], + "summary": "Count Tools", + "description": "Get a count of all tools available to agents belonging to the org of the user.", + "operationId": "count_tools", + "parameters": [ + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + } + }, + { + "name": "names", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by specific tool names", + "title": "Names" + }, + "description": "Filter by specific tool names" + }, + { + "name": "tool_ids", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by specific tool IDs - accepts repeated params or comma-separated values", + "title": "Tool Ids" + }, + "description": "Filter by specific tool IDs - accepts repeated params or comma-separated values" + }, + { + "name": "search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search tool names (case-insensitive partial match)", + "title": "Search" + }, + "description": "Search tool names (case-insensitive partial match)" + }, + { + "name": "tool_types", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by tool type(s) - accepts repeated params or comma-separated values", + "title": "Tool Types" + }, + "description": "Filter by tool type(s) - accepts repeated params or comma-separated values" + }, + { + "name": "exclude_tool_types", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Tool type(s) to exclude - accepts repeated params or comma-separated values", + "title": "Exclude Tool Types" + }, + "description": "Tool type(s) to exclude - accepts repeated params or comma-separated values" + }, + { + "name": "return_only_letta_tools", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Count only tools with tool_type starting with 'letta_'", + "default": false, + "title": "Return Only Letta Tools" + }, + "description": "Count only tools with tool_type starting with 'letta_'" + }, + { + "name": "exclude_letta_tools", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Exclude built-in Letta tools from the count", + "default": false, + "title": "Exclude Letta Tools" + }, + "description": "Exclude built-in Letta tools from the count" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Tools" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/": { + "get": { + "tags": ["tools"], + "summary": "List Tools", + "description": "Get a list of all tools available to agents.", + "operationId": "list_tools", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Tool ID cursor for pagination. Returns tools that come before this tool ID in the specified sort order", + "title": "Before" + }, + "description": "Tool ID cursor for pagination. Returns tools that come before this tool ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Tool ID cursor for pagination. Returns tools that come after this tool ID in the specified sort order", + "title": "After" + }, + "description": "Tool ID cursor for pagination. Returns tools that come after this tool ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of tools to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of tools to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for tools by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for tools by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by single tool name", + "title": "Name" + }, + "description": "Filter by single tool name" + }, + { + "name": "names", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by specific tool names", + "title": "Names" + }, + "description": "Filter by specific tool names" + }, + { + "name": "tool_ids", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by specific tool IDs - accepts repeated params or comma-separated values", + "title": "Tool Ids" + }, + "description": "Filter by specific tool IDs - accepts repeated params or comma-separated values" + }, + { + "name": "search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search tool names (case-insensitive partial match)", + "title": "Search" + }, + "description": "Search tool names (case-insensitive partial match)" + }, + { + "name": "tool_types", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by tool type(s) - accepts repeated params or comma-separated values", + "title": "Tool Types" + }, + "description": "Filter by tool type(s) - accepts repeated params or comma-separated values" + }, + { + "name": "exclude_tool_types", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Tool type(s) to exclude - accepts repeated params or comma-separated values", + "title": "Exclude Tool Types" + }, + "description": "Tool type(s) to exclude - accepts repeated params or comma-separated values" + }, + { + "name": "return_only_letta_tools", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Return only tools with tool_type starting with 'letta_'", + "default": false, + "title": "Return Only Letta Tools" + }, + "description": "Return only tools with tool_type starting with 'letta_'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + }, + "title": "Response List Tools" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["tools"], + "summary": "Create Tool", + "description": "Create a new tool", + "operationId": "create_tool", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Tool" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "put": { + "tags": ["tools"], + "summary": "Upsert Tool", + "description": "Create or update a tool", + "operationId": "upsert_tool", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Tool" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/search": { + "post": { + "tags": ["tools"], + "summary": "Search Tools", + "description": "Search tools using semantic search.\n\nRequires tool embedding to be enabled (embed_tools=True). Uses vector search,\nfull-text search, or hybrid mode to find tools matching the query.\n\nReturns tools ranked by relevance with their search scores.", + "operationId": "search_tools", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolSearchRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolSearchResult" + }, + "title": "Response Search Tools" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/add-base-tools": { + "post": { + "tags": ["tools"], + "summary": "Upsert Base Tools", + "description": "Upsert base tools", + "operationId": "add_base_tools", + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + }, + "title": "Response Add Base Tools" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/run": { + "post": { + "tags": ["tools"], + "summary": "Run Tool From Source", + "description": "Attempt to build a tool from source, then run it on the provided arguments", + "operationId": "run_tool_from_source", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolRunFromSource" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolReturnMessage" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers": { + "get": { + "tags": ["tools"], + "summary": "List Mcp Servers", + "description": "Get a list of all configured MCP servers", + "operationId": "list_mcp_servers", + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "$ref": "#/components/schemas/SSEServerConfig" + }, + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "$ref": "#/components/schemas/StreamableHTTPServerConfig" + } + ] + }, + "title": "Response List Mcp Servers" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "put": { + "tags": ["tools"], + "summary": "Add Mcp Server To Config", + "description": "Add a new MCP server to the Letta MCP server config", + "operationId": "add_mcp_server", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "$ref": "#/components/schemas/SSEServerConfig" + }, + { + "$ref": "#/components/schemas/StreamableHTTPServerConfig" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "$ref": "#/components/schemas/SSEServerConfig" + }, + { + "$ref": "#/components/schemas/StreamableHTTPServerConfig" + } + ] + }, + "title": "Response Add Mcp Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers/{mcp_server_name}/tools": { + "get": { + "tags": ["tools"], + "summary": "List Mcp Tools By Server", + "description": "Get a list of all tools for a specific MCP server", + "operationId": "list_mcp_tools_by_server", + "parameters": [ + { + "name": "mcp_server_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Name" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MCPTool" + }, + "title": "Response List Mcp Tools By Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers/{mcp_server_name}/resync": { + "post": { + "tags": ["tools"], + "summary": "Resync Mcp Server Tools", + "description": "Resync tools for an MCP server by:\n1. Fetching current tools from the MCP server\n2. Deleting tools that no longer exist on the server\n3. Updating schemas for existing tools\n4. Adding new tools from the server\n\nReturns a summary of changes made.", + "operationId": "resync_mcp_server_tools", + "parameters": [ + { + "name": "mcp_server_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Name" + } + }, + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers/{mcp_server_name}/{mcp_tool_name}": { + "post": { + "tags": ["tools"], + "summary": "Add Mcp Tool", + "description": "Register a new MCP tool as a Letta server by MCP server + tool name", + "operationId": "add_mcp_tool", + "parameters": [ + { + "name": "mcp_server_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Name" + } + }, + { + "name": "mcp_tool_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Tool Name" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Tool" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers/{mcp_server_name}": { + "patch": { + "tags": ["tools"], + "summary": "Update Mcp Server", + "description": "Update an existing MCP server configuration", + "operationId": "update_mcp_server", + "parameters": [ + { + "name": "mcp_server_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Name" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/letta__schemas__mcp__UpdateStdioMCPServer" + }, + { + "$ref": "#/components/schemas/letta__schemas__mcp__UpdateSSEMCPServer" + }, + { + "$ref": "#/components/schemas/letta__schemas__mcp__UpdateStreamableHTTPMCPServer" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "$ref": "#/components/schemas/SSEServerConfig" + }, + { + "$ref": "#/components/schemas/StreamableHTTPServerConfig" + } + ], + "title": "Response Update Mcp Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["tools"], + "summary": "Delete Mcp Server From Config", + "description": "Delete a MCP server configuration", + "operationId": "delete_mcp_server", + "parameters": [ + { + "name": "mcp_server_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Name" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "$ref": "#/components/schemas/SSEServerConfig" + }, + { + "$ref": "#/components/schemas/StreamableHTTPServerConfig" + } + ] + }, + "title": "Response Delete Mcp Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers/test": { + "post": { + "tags": ["tools"], + "summary": "Test Mcp Server", + "description": "Test connection to an MCP server without adding it.\nReturns the list of available tools if successful.", + "operationId": "test_mcp_server", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "$ref": "#/components/schemas/SSEServerConfig" + }, + { + "$ref": "#/components/schemas/StreamableHTTPServerConfig" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers/connect": { + "post": { + "tags": ["tools"], + "summary": "Connect Mcp Server", + "description": "Connect to an MCP server with support for OAuth via SSE.\nReturns a stream of events handling authorization state and exchange if OAuth is required.", + "operationId": "connect_mcp_server", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "$ref": "#/components/schemas/SSEServerConfig" + }, + { + "$ref": "#/components/schemas/StreamableHTTPServerConfig" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": {} + }, + "text/event-stream": { + "description": "Server-Sent Events stream" + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/servers/{mcp_server_name}/tools/{tool_name}/execute": { + "post": { + "tags": ["tools"], + "summary": "Execute Mcp Tool", + "description": "Execute a specific MCP tool from a configured server.\nReturns the tool execution result.", + "operationId": "execute_mcp_tool", + "parameters": [ + { + "name": "mcp_server_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Name" + } + }, + { + "name": "tool_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Tool Name" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/letta__server__rest_api__routers__v1__tools__ToolExecuteRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tools/mcp/oauth/callback": { + "get": { + "tags": ["tools"], + "summary": "Mcp Oauth Callback", + "description": "Handle OAuth callback for MCP server authentication.\nSession is identified via the state parameter instead of URL path.", + "operationId": "mcp_oauth_callback", + "parameters": [ + { + "name": "code", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "OAuth authorization code", + "title": "Code" + }, + "description": "OAuth authorization code" + }, + { + "name": "state", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "OAuth state parameter", + "title": "State" + }, + "description": "OAuth state parameter" + }, + { + "name": "error", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "OAuth error", + "title": "Error" + }, + "description": "OAuth error" + }, + { + "name": "error_description", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "OAuth error description", + "title": "Error Description" + }, + "description": "OAuth error description" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/count": { + "get": { + "tags": ["sources"], + "summary": "Count Sources", + "description": "Count all data sources created by a user.", + "operationId": "count_sources", + "deprecated": true, + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Sources" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/{source_id}": { + "get": { + "tags": ["sources"], + "summary": "Retrieve Source", + "description": "Get all sources", + "operationId": "retrieve_source", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Source" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["sources"], + "summary": "Modify Source", + "description": "Update the name or documentation of an existing data source.", + "operationId": "modify_source", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SourceUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Source" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["sources"], + "summary": "Delete Source", + "description": "Delete a data source.", + "operationId": "delete_source", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/name/{source_name}": { + "get": { + "tags": ["sources"], + "summary": "Get Source Id By Name", + "description": "Get a source by name", + "operationId": "get_source_id_by_name", + "deprecated": true, + "parameters": [ + { + "name": "source_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Source Name" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "string", + "title": "Response Get Source Id By Name" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/metadata": { + "get": { + "tags": ["sources"], + "summary": "Get Sources Metadata", + "description": "Get aggregated metadata for all sources in an organization.\n\nReturns structured metadata including:\n- Total number of sources\n- Total number of files across all sources\n- Total size of all files\n- Per-source breakdown with file details (file_name, file_size per file) if include_detailed_per_source_metadata is True", + "operationId": "get_sources_metadata", + "deprecated": true, + "parameters": [ + { + "name": "include_detailed_per_source_metadata", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "default": false, + "title": "Include Detailed Per Source Metadata" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OrganizationSourcesStats" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/": { + "get": { + "tags": ["sources"], + "summary": "List Sources", + "description": "List all data sources created by a user.", + "operationId": "list_sources", + "deprecated": true, + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Source" + }, + "title": "Response List Sources" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["sources"], + "summary": "Create Source", + "description": "Create a new data source.", + "operationId": "create_source", + "deprecated": true, + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SourceCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Source" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/{source_id}/upload": { + "post": { + "tags": ["sources"], + "summary": "Upload File To Source", + "description": "Upload a file to a data source.", + "operationId": "upload_file_to_source", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "duplicate_handling", + "in": "query", + "required": false, + "schema": { + "$ref": "#/components/schemas/DuplicateFileHandling", + "description": "How to handle duplicate filenames", + "default": "suffix" + }, + "description": "How to handle duplicate filenames" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Optional custom name to override the uploaded file's name", + "title": "Name" + }, + "description": "Optional custom name to override the uploaded file's name" + } + ], + "requestBody": { + "required": true, + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_upload_file_to_source" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileMetadata" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/{source_id}/agents": { + "get": { + "tags": ["sources"], + "summary": "Get Agents For Source", + "description": "Get all agent IDs that have the specified source attached.", + "operationId": "get_agents_for_source", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Response Get Agents For Source" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/{source_id}/passages": { + "get": { + "tags": ["sources"], + "summary": "List Source Passages", + "description": "List all passages associated with a data source.", + "operationId": "list_source_passages", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message after which to retrieve the returned messages.", + "title": "After" + }, + "description": "Message after which to retrieve the returned messages." + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message before which to retrieve the returned messages.", + "title": "Before" + }, + "description": "Message before which to retrieve the returned messages." + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "description": "Maximum number of messages to retrieve.", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to retrieve." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Passage" + }, + "title": "Response List Source Passages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/{source_id}/files": { + "get": { + "tags": ["sources"], + "summary": "List Source Files", + "description": "List paginated files associated with a data source.", + "operationId": "list_source_files", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "description": "Number of files to return", + "default": 1000, + "title": "Limit" + }, + "description": "Number of files to return" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Pagination cursor to fetch the next set of results", + "title": "After" + }, + "description": "Pagination cursor to fetch the next set of results" + }, + { + "name": "include_content", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to include full file content", + "default": false, + "title": "Include Content" + }, + "description": "Whether to include full file content" + }, + { + "name": "check_status_updates", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to check and update file processing status (from the vector db service). If False, will not fetch and update the status, which may lead to performance gains.", + "default": true, + "title": "Check Status Updates" + }, + "description": "Whether to check and update file processing status (from the vector db service). If False, will not fetch and update the status, which may lead to performance gains." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FileMetadata" + }, + "title": "Response List Source Files" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/{source_id}/files/{file_id}": { + "get": { + "tags": ["sources"], + "summary": "Get File Metadata", + "description": "Retrieve metadata for a specific file by its ID.", + "operationId": "get_file_metadata", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "file_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^file-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the file in the format 'file-'", + "examples": ["file-123e4567-e89b-42d3-8456-426614174000"], + "title": "File Id" + }, + "description": "The ID of the file in the format 'file-'" + }, + { + "name": "include_content", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to include full file content", + "default": false, + "title": "Include Content" + }, + "description": "Whether to include full file content" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileMetadata" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/sources/{source_id}/{file_id}": { + "delete": { + "tags": ["sources"], + "summary": "Delete File From Source", + "description": "Delete a data source.", + "operationId": "delete_file_from_source", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "file_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^file-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the file in the format 'file-'", + "examples": ["file-123e4567-e89b-42d3-8456-426614174000"], + "title": "File Id" + }, + "description": "The ID of the file in the format 'file-'" + } + ], + "responses": { + "204": { + "description": "Successful Response" + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/count": { + "get": { + "tags": ["folders"], + "summary": "Count Folders", + "description": "Count all data folders created by a user.", + "operationId": "count_folders", + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Folders" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/{folder_id}": { + "get": { + "tags": ["folders"], + "summary": "Retrieve Folder", + "description": "Get a folder by ID", + "operationId": "retrieve_folder", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Folder" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["folders"], + "summary": "Modify Folder", + "description": "Update the name or documentation of an existing data folder.", + "operationId": "modify_folder", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SourceUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Folder" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["folders"], + "summary": "Delete Folder", + "description": "Delete a data folder.", + "operationId": "delete_folder", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/name/{folder_name}": { + "get": { + "tags": ["folders"], + "summary": "Get Folder By Name", + "description": "**Deprecated**: Please use the list endpoint `GET /v1/folders?name=` instead.\n\n\nGet a folder by name.", + "operationId": "get_folder_by_name", + "deprecated": true, + "parameters": [ + { + "name": "folder_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Folder Name" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "string", + "title": "Response Get Folder By Name" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/metadata": { + "get": { + "tags": ["folders"], + "summary": "Retrieve Metadata", + "description": "Get aggregated metadata for all folders in an organization.\n\nReturns structured metadata including:\n- Total number of folders\n- Total number of files across all folders\n- Total size of all files\n- Per-source breakdown with file details (file_name, file_size per file) if include_detailed_per_source_metadata is True", + "operationId": "retrieve_metadata", + "parameters": [ + { + "name": "include_detailed_per_source_metadata", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "default": false, + "title": "Include Detailed Per Source Metadata" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OrganizationSourcesStats" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/": { + "get": { + "tags": ["folders"], + "summary": "List Folders", + "description": "List all data folders created by a user.", + "operationId": "list_folders", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Folder ID cursor for pagination. Returns folders that come before this folder ID in the specified sort order", + "title": "Before" + }, + "description": "Folder ID cursor for pagination. Returns folders that come before this folder ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Folder ID cursor for pagination. Returns folders that come after this folder ID in the specified sort order", + "title": "After" + }, + "description": "Folder ID cursor for pagination. Returns folders that come after this folder ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of folders to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of folders to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for folders by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "asc", + "title": "Order" + }, + "description": "Sort order for folders by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Folder name to filter by", + "title": "Name" + }, + "description": "Folder name to filter by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Folder" + }, + "title": "Response List Folders" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["folders"], + "summary": "Create Folder", + "description": "Create a new data folder.", + "operationId": "create_folder", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SourceCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Folder" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/{folder_id}/upload": { + "post": { + "tags": ["folders"], + "summary": "Upload File To Folder", + "description": "Upload a file to a data folder.", + "operationId": "upload_file_to_folder", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "duplicate_handling", + "in": "query", + "required": false, + "schema": { + "$ref": "#/components/schemas/DuplicateFileHandling", + "description": "How to handle duplicate filenames", + "default": "suffix" + }, + "description": "How to handle duplicate filenames" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Optional custom name to override the uploaded file's name", + "title": "Name" + }, + "description": "Optional custom name to override the uploaded file's name" + } + ], + "requestBody": { + "required": true, + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_upload_file_to_folder" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileMetadata" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/{folder_id}/agents": { + "get": { + "tags": ["folders"], + "summary": "List Agents For Folder", + "description": "Get all agent IDs that have the specified folder attached.", + "operationId": "list_agents_for_folder", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order", + "title": "Before" + }, + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order", + "title": "After" + }, + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of agents to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of agents to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Response List Agents For Folder" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/{folder_id}/passages": { + "get": { + "tags": ["folders"], + "summary": "List Folder Passages", + "description": "List all passages associated with a data folder.", + "operationId": "list_folder_passages", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Passage ID cursor for pagination. Returns passages that come before this passage ID in the specified sort order", + "title": "Before" + }, + "description": "Passage ID cursor for pagination. Returns passages that come before this passage ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Passage ID cursor for pagination. Returns passages that come after this passage ID in the specified sort order", + "title": "After" + }, + "description": "Passage ID cursor for pagination. Returns passages that come after this passage ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of passages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of passages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for passages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for passages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Passage" + }, + "title": "Response List Folder Passages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/{folder_id}/files": { + "get": { + "tags": ["folders"], + "summary": "List Files For Folder", + "description": "List paginated files associated with a data folder.", + "operationId": "list_files_for_folder", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "File ID cursor for pagination. Returns files that come before this file ID in the specified sort order", + "title": "Before" + }, + "description": "File ID cursor for pagination. Returns files that come before this file ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "File ID cursor for pagination. Returns files that come after this file ID in the specified sort order", + "title": "After" + }, + "description": "File ID cursor for pagination. Returns files that come after this file ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of files to return", + "default": 1000, + "title": "Limit" + }, + "description": "Maximum number of files to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for files by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for files by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "include_content", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to include full file content", + "default": false, + "title": "Include Content" + }, + "description": "Whether to include full file content" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FileMetadata" + }, + "title": "Response List Files For Folder" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/{folder_id}/files/{file_id}": { + "get": { + "tags": ["folders"], + "summary": "Retrieve File", + "description": "Retrieve a file from a folder by ID.", + "operationId": "retrieve_file", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "file_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^file-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the file in the format 'file-'", + "examples": ["file-123e4567-e89b-42d3-8456-426614174000"], + "title": "File Id" + }, + "description": "The ID of the file in the format 'file-'" + }, + { + "name": "include_content", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to include full file content", + "default": false, + "title": "Include Content" + }, + "description": "Whether to include full file content" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileMetadata" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/folders/{folder_id}/{file_id}": { + "delete": { + "tags": ["folders"], + "summary": "Delete File From Folder", + "description": "Delete a file from a folder.", + "operationId": "delete_file_from_folder", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "file_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^file-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the file in the format 'file-'", + "examples": ["file-123e4567-e89b-42d3-8456-426614174000"], + "title": "File Id" + }, + "description": "The ID of the file in the format 'file-'" + } + ], + "responses": { + "204": { + "description": "Successful Response" + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/": { + "get": { + "tags": ["agents"], + "summary": "List Agents", + "description": "Get a list of all agents.", + "operationId": "list_agents", + "parameters": [ + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Name of the agent", + "title": "Name" + }, + "description": "Name of the agent" + }, + { + "name": "tags", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "List of tags to filter agents by", + "title": "Tags" + }, + "description": "List of tags to filter agents by" + }, + { + "name": "match_all_tags", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "If True, only returns agents that match ALL given tags. Otherwise, return agents that have ANY of the passed-in tags.", + "default": false, + "title": "Match All Tags" + }, + "description": "If True, only returns agents that match ALL given tags. Otherwise, return agents that have ANY of the passed-in tags." + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Cursor for pagination", + "title": "Before" + }, + "description": "Cursor for pagination" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Cursor for pagination", + "title": "After" + }, + "description": "Cursor for pagination" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Limit for pagination", + "default": 50, + "title": "Limit" + }, + "description": "Limit for pagination" + }, + { + "name": "query_text", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by name", + "title": "Query Text" + }, + "description": "Search agents by name" + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by project ID - this will default to your default project on cloud", + "title": "Project Id" + }, + "description": "Search agents by project ID - this will default to your default project on cloud" + }, + { + "name": "template_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by template ID", + "title": "Template Id" + }, + "description": "Search agents by template ID" + }, + { + "name": "base_template_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by base template ID", + "title": "Base Template Id" + }, + "description": "Search agents by base template ID" + }, + { + "name": "identity_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by identity ID", + "title": "Identity Id" + }, + "description": "Search agents by identity ID" + }, + { + "name": "identifier_keys", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Search agents by identifier keys", + "title": "Identifier Keys" + }, + "description": "Search agents by identifier keys" + }, + { + "name": "include_relationships", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true, + "title": "Include Relationships" + }, + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true + }, + { + "name": "include", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "enum": [ + "agent.blocks", + "agent.identities", + "agent.managed_group", + "agent.pending_approval", + "agent.secrets", + "agent.sources", + "agent.tags", + "agent.tools" + ], + "type": "string" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default.", + "default": [], + "title": "Include" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default." + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "enum": ["created_at", "last_run_completion"], + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "ascending", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to sort agents oldest to newest (True) or newest to oldest (False, default)", + "deprecated": true, + "default": false, + "title": "Ascending" + }, + "description": "Whether to sort agents oldest to newest (True) or newest to oldest (False, default)", + "deprecated": true + }, + { + "name": "sort_by", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Field to sort by. Options: 'created_at' (default), 'last_run_completion'", + "deprecated": true, + "default": "created_at", + "title": "Sort By" + }, + "description": "Field to sort by. Options: 'created_at' (default), 'last_run_completion'", + "deprecated": true + }, + { + "name": "last_stop_reason", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "Filter agents by their last stop reason.", + "title": "Last Stop Reason" + }, + "description": "Filter agents by their last stop reason." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentState" + }, + "title": "Response List Agents" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["agents"], + "summary": "Create Agent", + "description": "Create an agent.", + "operationId": "create_agent", + "parameters": [ + { + "name": "X-Project", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The project slug to associate with the agent (cloud only).", + "title": "X-Project" + }, + "description": "The project slug to associate with the agent (cloud only)." + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateAgentRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/count": { + "get": { + "tags": ["agents"], + "summary": "Count Agents", + "description": "Get the total number of agents with optional filtering.\nSupports the same filters as list_agents for consistent querying.", + "operationId": "count_agents", + "parameters": [ + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Name of the agent", + "title": "Name" + }, + "description": "Name of the agent" + }, + { + "name": "tags", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "List of tags to filter agents by", + "title": "Tags" + }, + "description": "List of tags to filter agents by" + }, + { + "name": "match_all_tags", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "If True, only counts agents that match ALL given tags. Otherwise, counts agents that have ANY of the passed-in tags.", + "default": false, + "title": "Match All Tags" + }, + "description": "If True, only counts agents that match ALL given tags. Otherwise, counts agents that have ANY of the passed-in tags." + }, + { + "name": "query_text", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by name", + "title": "Query Text" + }, + "description": "Search agents by name" + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by project ID - this will default to your default project on cloud", + "title": "Project Id" + }, + "description": "Search agents by project ID - this will default to your default project on cloud" + }, + { + "name": "template_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by template ID", + "title": "Template Id" + }, + "description": "Search agents by template ID" + }, + { + "name": "base_template_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by base template ID", + "title": "Base Template Id" + }, + "description": "Search agents by base template ID" + }, + { + "name": "identity_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search agents by identity ID", + "title": "Identity Id" + }, + "description": "Search agents by identity ID" + }, + { + "name": "identifier_keys", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Search agents by identifier keys", + "title": "Identifier Keys" + }, + "description": "Search agents by identifier keys" + }, + { + "name": "last_stop_reason", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "Filter agents by their last stop reason.", + "title": "Last Stop Reason" + }, + "description": "Filter agents by their last stop reason." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Agents" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/export": { + "get": { + "tags": ["agents"], + "summary": "Export Agent", + "description": "Export the serialized JSON representation of an agent, formatted with indentation.", + "operationId": "export_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Agent Id" + } + }, + { + "name": "max_steps", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "deprecated": true, + "default": 100, + "title": "Max Steps" + }, + "deprecated": true + }, + { + "name": "use_legacy_format", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "If True, exports using the legacy single-agent 'v1' format with inline tools/blocks. If False, exports using the new multi-entity 'v2' format, with separate agents, tools, blocks, files, etc.", + "deprecated": true, + "default": false, + "title": "Use Legacy Format" + }, + "description": "If True, exports using the legacy single-agent 'v1' format with inline tools/blocks. If False, exports using the new multi-entity 'v2' format, with separate agents, tools, blocks, files, etc.", + "deprecated": true + }, + { + "name": "conversation_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Conversation ID to export. If provided, uses messages from this conversation instead of the agent's global message history.", + "title": "Conversation Id" + }, + "description": "Conversation ID to export. If provided, uses messages from this conversation instead of the agent's global message history." + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Body_export_agent" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/import": { + "post": { + "tags": ["agents"], + "summary": "Import Agent", + "description": "Import a serialized agent file and recreate the agent(s) in the system.\nReturns the IDs of all imported agents.", + "operationId": "import_agent", + "parameters": [ + { + "name": "x-override-embedding-model", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Override-Embedding-Model" + } + } + ], + "requestBody": { + "required": true, + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_import_agent" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ImportedAgentsResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/context": { + "get": { + "tags": ["agents"], + "summary": "Retrieve Agent Context Window", + "description": "Retrieve the context window of a specific agent.", + "operationId": "retrieve_agent_context_window", + "deprecated": true, + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "conversation_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Conversation ID to get context window for. If provided, uses messages from this conversation.", + "title": "Conversation Id" + }, + "description": "Conversation ID to get context window for. If provided, uses messages from this conversation." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ContextWindowOverview" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}": { + "patch": { + "tags": ["agents"], + "summary": "Modify Agent", + "description": "Update an existing agent.", + "operationId": "modify_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateAgent" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["agents"], + "summary": "Retrieve Agent", + "description": "Get the state of the agent.", + "operationId": "retrieve_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "include_relationships", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true, + "title": "Include Relationships" + }, + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true + }, + { + "name": "include", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "enum": [ + "agent.blocks", + "agent.identities", + "agent.managed_group", + "agent.pending_approval", + "agent.secrets", + "agent.sources", + "agent.tags", + "agent.tools" + ], + "type": "string" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default.", + "default": [], + "title": "Include" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["agents"], + "summary": "Delete Agent", + "description": "Delete an agent.", + "operationId": "delete_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/tools": { + "get": { + "tags": ["agents"], + "summary": "List Tools For Agent", + "description": "Get tools from an existing agent.", + "operationId": "list_tools_for_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Tool ID cursor for pagination. Returns tools that come before this tool ID in the specified sort order", + "title": "Before" + }, + "description": "Tool ID cursor for pagination. Returns tools that come before this tool ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Tool ID cursor for pagination. Returns tools that come after this tool ID in the specified sort order", + "title": "After" + }, + "description": "Tool ID cursor for pagination. Returns tools that come after this tool ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of tools to return", + "default": 10, + "title": "Limit" + }, + "description": "Maximum number of tools to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for tools by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for tools by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + }, + "title": "Response List Tools For Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/tools/attach/{tool_id}": { + "patch": { + "tags": ["agents"], + "summary": "Attach Tool To Agent", + "description": "Attach a tool to an agent.", + "operationId": "attach_tool_to_agent", + "parameters": [ + { + "name": "tool_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^tool-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the tool in the format 'tool-'", + "examples": ["tool-123e4567-e89b-42d3-8456-426614174000"], + "title": "Tool Id" + }, + "description": "The ID of the tool in the format 'tool-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentState" + }, + { + "type": "null" + } + ], + "title": "Response Attach Tool To Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/tools/detach/{tool_id}": { + "patch": { + "tags": ["agents"], + "summary": "Detach Tool From Agent", + "description": "Detach a tool from an agent.", + "operationId": "detach_tool_from_agent", + "parameters": [ + { + "name": "tool_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^tool-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the tool in the format 'tool-'", + "examples": ["tool-123e4567-e89b-42d3-8456-426614174000"], + "title": "Tool Id" + }, + "description": "The ID of the tool in the format 'tool-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentState" + }, + { + "type": "null" + } + ], + "title": "Response Detach Tool From Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/tools/approval/{tool_name}": { + "patch": { + "tags": ["agents"], + "summary": "Modify Approval For Tool", + "description": "Modify the approval requirement for a tool attached to an agent.\n\nAccepts requires_approval via request body (preferred) or query parameter (deprecated).", + "operationId": "modify_approval_for_tool", + "parameters": [ + { + "name": "tool_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Tool Name" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "requires_approval", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Whether the tool requires approval before execution", + "deprecated": true, + "title": "Requires Approval" + }, + "description": "Whether the tool requires approval before execution", + "deprecated": true + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/ModifyApprovalRequest" + }, + { + "type": "null" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentState" + }, + { + "type": "null" + } + ], + "title": "Response Modify Approval For Tool" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/tools/{tool_name}/run": { + "post": { + "tags": ["agents"], + "summary": "Run Tool For Agent", + "description": "Trigger a tool by name on a specific agent, providing the necessary arguments.\n\nThis endpoint executes a tool that is attached to the agent, using the agent's\nstate and environment variables for execution context.", + "operationId": "run_tool_for_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "tool_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Tool Name" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/letta__schemas__mcp_server__ToolExecuteRequest", + "default": { + "args": {} + } + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolExecutionResult" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/sources/attach/{source_id}": { + "patch": { + "tags": ["agents"], + "summary": "Attach Source", + "description": "Attach a source to an agent.", + "operationId": "attach_source_to_agent", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/folders/attach/{folder_id}": { + "patch": { + "tags": ["agents"], + "summary": "Attach Folder To Agent", + "description": "Attach a folder to an agent.", + "operationId": "attach_folder_to_agent", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentState" + }, + { + "type": "null" + } + ], + "title": "Response Attach Folder To Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/sources/detach/{source_id}": { + "patch": { + "tags": ["agents"], + "summary": "Detach Source", + "description": "Detach a source from an agent.", + "operationId": "detach_source_from_agent", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Source Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/folders/detach/{folder_id}": { + "patch": { + "tags": ["agents"], + "summary": "Detach Folder From Agent", + "description": "Detach a folder from an agent.", + "operationId": "detach_folder_from_agent", + "parameters": [ + { + "name": "folder_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 43, + "maxLength": 43, + "pattern": "^source-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the source in the format 'source-'", + "examples": ["source-123e4567-e89b-42d3-8456-426614174000"], + "title": "Folder Id" + }, + "description": "The ID of the source in the format 'source-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentState" + }, + { + "type": "null" + } + ], + "title": "Response Detach Folder From Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/files/close-all": { + "patch": { + "tags": ["agents"], + "summary": "Close All Files For Agent", + "description": "Closes all currently open files for a given agent.\n\nThis endpoint updates the file state for the agent so that no files are marked as open.\nTypically used to reset the working memory view for the agent.", + "operationId": "close_all_files_for_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Response Close All Files For Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/files/{file_id}/open": { + "patch": { + "tags": ["agents"], + "summary": "Open File For Agent", + "description": "Opens a specific file for a given agent.\n\nThis endpoint marks a specific file as open in the agent's file state.\nThe file will be included in the agent's working memory view.\nReturns a list of file names that were closed due to LRU eviction.", + "operationId": "open_file_for_agent", + "parameters": [ + { + "name": "file_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^file-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the file in the format 'file-'", + "examples": ["file-123e4567-e89b-42d3-8456-426614174000"], + "title": "File Id" + }, + "description": "The ID of the file in the format 'file-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Response Open File For Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/files/{file_id}/close": { + "patch": { + "tags": ["agents"], + "summary": "Close File For Agent", + "description": "Closes a specific file for a given agent.\n\nThis endpoint marks a specific file as closed in the agent's file state.\nThe file will be removed from the agent's working memory view.", + "operationId": "close_file_for_agent", + "parameters": [ + { + "name": "file_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^file-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the file in the format 'file-'", + "examples": ["file-123e4567-e89b-42d3-8456-426614174000"], + "title": "File Id" + }, + "description": "The ID of the file in the format 'file-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/sources": { + "get": { + "tags": ["agents"], + "summary": "List Agent Sources", + "description": "Get the sources associated with an agent.", + "operationId": "list_agent_sources", + "deprecated": true, + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Source ID cursor for pagination. Returns sources that come before this source ID in the specified sort order", + "title": "Before" + }, + "description": "Source ID cursor for pagination. Returns sources that come before this source ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Source ID cursor for pagination. Returns sources that come after this source ID in the specified sort order", + "title": "After" + }, + "description": "Source ID cursor for pagination. Returns sources that come after this source ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of sources to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of sources to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for sources by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for sources by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Source" + }, + "title": "Response List Agent Sources" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/folders": { + "get": { + "tags": ["agents"], + "summary": "List Folders For Agent", + "description": "Get the folders associated with an agent.", + "operationId": "list_folders_for_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Source ID cursor for pagination. Returns sources that come before this source ID in the specified sort order", + "title": "Before" + }, + "description": "Source ID cursor for pagination. Returns sources that come before this source ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Source ID cursor for pagination. Returns sources that come after this source ID in the specified sort order", + "title": "After" + }, + "description": "Source ID cursor for pagination. Returns sources that come after this source ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of sources to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of sources to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for sources by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for sources by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Source" + }, + "title": "Response List Folders For Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/files": { + "get": { + "tags": ["agents"], + "summary": "List Files For Agent", + "description": "Get the files attached to an agent with their open/closed status.", + "operationId": "list_files_for_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "File ID cursor for pagination. Returns files that come before this file ID in the specified sort order", + "title": "Before" + }, + "description": "File ID cursor for pagination. Returns files that come before this file ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "File ID cursor for pagination. Returns files that come after this file ID in the specified sort order", + "title": "After" + }, + "description": "File ID cursor for pagination. Returns files that come after this file ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of files to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of files to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for files by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for files by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "cursor", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Pagination cursor from previous response (deprecated, use before/after)", + "deprecated": true, + "title": "Cursor" + }, + "description": "Pagination cursor from previous response (deprecated, use before/after)", + "deprecated": true + }, + { + "name": "is_open", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Filter by open status (true for open files, false for closed files)", + "title": "Is Open" + }, + "description": "Filter by open status (true for open files, false for closed files)" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PaginatedAgentFiles" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/core-memory": { + "get": { + "tags": ["agents"], + "summary": "Retrieve Agent Memory", + "description": "Retrieve the memory state of a specific agent.\nThis endpoint fetches the current memory state of the agent identified by the user ID and agent ID.", + "operationId": "retrieve_agent_memory", + "deprecated": true, + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Memory" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/core-memory/blocks/{block_label}": { + "get": { + "tags": ["agents"], + "summary": "Retrieve Block For Agent", + "description": "Retrieve a core memory block from an agent.", + "operationId": "retrieve_core_memory_block", + "parameters": [ + { + "name": "block_label", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Block Label" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["agents"], + "summary": "Modify Block For Agent", + "description": "Updates a core memory block of an agent.", + "operationId": "modify_core_memory_block", + "parameters": [ + { + "name": "block_label", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Block Label" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/core-memory/blocks": { + "get": { + "tags": ["agents"], + "summary": "List Blocks For Agent", + "description": "Retrieve the core memory blocks of a specific agent.", + "operationId": "list_core_memory_blocks", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order", + "title": "Before" + }, + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order", + "title": "After" + }, + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of blocks to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of blocks to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlockResponse" + }, + "title": "Response List Core Memory Blocks" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/core-memory/blocks/attach/{block_id}": { + "patch": { + "tags": ["agents"], + "summary": "Attach Block To Agent", + "description": "Attach a core memory block to an agent.", + "operationId": "attach_core_memory_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/core-memory/blocks/detach/{block_id}": { + "patch": { + "tags": ["agents"], + "summary": "Detach Block From Agent", + "description": "Detach a core memory block from an agent.", + "operationId": "detach_core_memory_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/archives/attach/{archive_id}": { + "patch": { + "tags": ["agents"], + "summary": "Attach Archive To Agent", + "description": "Attach an archive to an agent.", + "operationId": "attach_archive_to_agent", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Archive Id" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/archives/detach/{archive_id}": { + "patch": { + "tags": ["agents"], + "summary": "Detach Archive From Agent", + "description": "Detach an archive from an agent.", + "operationId": "detach_archive_from_agent", + "parameters": [ + { + "name": "archive_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Archive Id" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/identities/attach/{identity_id}": { + "patch": { + "tags": ["agents"], + "summary": "Attach Identity To Agent", + "description": "Attach an identity to an agent.", + "operationId": "attach_identity_to_agent", + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Identity Id" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/identities/detach/{identity_id}": { + "patch": { + "tags": ["agents"], + "summary": "Detach Identity From Agent", + "description": "Detach an identity from an agent.", + "operationId": "detach_identity_from_agent", + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Identity Id" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/archival-memory": { + "get": { + "tags": ["agents"], + "summary": "List Passages", + "description": "Retrieve the memories in an agent's archival memory store (paginated query).", + "operationId": "list_passages", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Unique ID of the memory to start the query range at.", + "title": "After" + }, + "description": "Unique ID of the memory to start the query range at." + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Unique ID of the memory to end the query range at.", + "title": "Before" + }, + "description": "Unique ID of the memory to end the query range at." + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "How many results to include in the response.", + "default": 100, + "title": "Limit" + }, + "description": "How many results to include in the response." + }, + { + "name": "search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search passages by text", + "title": "Search" + }, + "description": "Search passages by text" + }, + { + "name": "ascending", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Whether to sort passages oldest to newest (True, default) or newest to oldest (False)", + "default": true, + "title": "Ascending" + }, + "description": "Whether to sort passages oldest to newest (True, default) or newest to oldest (False)" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Passage" + }, + "title": "Response List Passages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["agents"], + "summary": "Create Passage", + "description": "Insert a memory into an agent's archival memory store.", + "operationId": "create_passage", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateArchivalMemory" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Passage" + }, + "title": "Response Create Passage" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/archival-memory/search": { + "get": { + "tags": ["agents"], + "summary": "Search Archival Memory", + "description": "Search archival memory using semantic (embedding-based) search with optional temporal filtering.\n\nThis endpoint allows manual triggering of archival memory searches, enabling users to query\nan agent's archival memory store directly via the API. The search uses the same functionality\nas the agent's archival_memory_search tool but is accessible for external API usage.", + "operationId": "search_archival_memory", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "query", + "in": "query", + "required": true, + "schema": { + "type": "string", + "description": "String to search for using semantic similarity", + "title": "Query" + }, + "description": "String to search for using semantic similarity" + }, + { + "name": "tags", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Optional list of tags to filter search results", + "title": "Tags" + }, + "description": "Optional list of tags to filter search results" + }, + { + "name": "tag_match_mode", + "in": "query", + "required": false, + "schema": { + "enum": ["any", "all"], + "type": "string", + "description": "How to match tags - 'any' to match passages with any of the tags, 'all' to match only passages with all tags", + "default": "any", + "title": "Tag Match Mode" + }, + "description": "How to match tags - 'any' to match passages with any of the tags, 'all' to match only passages with all tags" + }, + { + "name": "top_k", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of results to return. Uses system default if not specified", + "title": "Top K" + }, + "description": "Maximum number of results to return. Uses system default if not specified" + }, + { + "name": "start_datetime", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "description": "Filter results to passages created after this datetime", + "title": "Start Datetime" + }, + "description": "Filter results to passages created after this datetime" + }, + { + "name": "end_datetime", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "description": "Filter results to passages created before this datetime", + "title": "End Datetime" + }, + "description": "Filter results to passages created before this datetime" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArchivalMemorySearchResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/archival-memory/{memory_id}": { + "delete": { + "tags": ["agents"], + "summary": "Delete Passage", + "description": "Delete a memory from an agent's archival memory store.", + "operationId": "delete_passage", + "parameters": [ + { + "name": "memory_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Memory Id" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/messages": { + "get": { + "tags": ["agents"], + "summary": "List Messages", + "description": "Retrieve message history for an agent.", + "operationId": "list_messages", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order", + "title": "Before" + }, + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order", + "title": "After" + }, + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "group_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Group ID to filter messages by.", + "title": "Group Id" + }, + "description": "Group ID to filter messages by." + }, + { + "name": "conversation_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Conversation ID to filter messages by.", + "title": "Conversation Id" + }, + "description": "Conversation ID to filter messages by." + }, + { + "name": "use_assistant_message", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to use assistant messages", + "deprecated": true, + "default": true, + "title": "Use Assistant Message" + }, + "description": "Whether to use assistant messages", + "deprecated": true + }, + { + "name": "assistant_message_tool_name", + "in": "query", + "required": false, + "schema": { + "type": "string", + "description": "The name of the designated message tool.", + "deprecated": true, + "default": "send_message", + "title": "Assistant Message Tool Name" + }, + "description": "The name of the designated message tool.", + "deprecated": true + }, + { + "name": "assistant_message_tool_kwarg", + "in": "query", + "required": false, + "schema": { + "type": "string", + "description": "The name of the message argument.", + "deprecated": true, + "default": "message", + "title": "Assistant Message Tool Kwarg" + }, + "description": "The name of the message argument.", + "deprecated": true + }, + { + "name": "include_err", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Whether to include error messages and error statuses. For debugging purposes only.", + "title": "Include Err" + }, + "description": "Whether to include error messages and error statuses. For debugging purposes only." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/LettaMessageUnion" + }, + "title": "Response List Messages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["agents"], + "summary": "Send Message", + "description": "Process a user message and return the agent's response.\nThis endpoint accepts a message from a user and processes it through the agent.\n\nThe response format is controlled by the `streaming` field in the request body:\n- If `streaming=false` (default): Returns a complete LettaResponse with all messages\n- If `streaming=true`: Returns a Server-Sent Events (SSE) stream\n\nAdditional streaming options (only used when streaming=true):\n- `stream_tokens`: Stream individual tokens instead of complete steps\n- `include_pings`: Include keepalive pings to prevent connection timeouts\n- `background`: Process the request in the background", + "operationId": "send_message", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaStreamingRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaResponse" + } + }, + "text/event-stream": { + "description": "Server-Sent Events stream (when streaming=true in request body)" + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/messages/{message_id}": { + "patch": { + "tags": ["agents"], + "summary": "Modify Message", + "description": "Update the details of a message associated with an agent.", + "operationId": "modify_message", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "message_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^message-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the message in the format 'message-'", + "examples": ["message-123e4567-e89b-42d3-8456-426614174000"], + "title": "Message Id" + }, + "description": "The ID of the message in the format 'message-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/UpdateSystemMessage" + }, + { + "$ref": "#/components/schemas/UpdateUserMessage" + }, + { + "$ref": "#/components/schemas/UpdateReasoningMessage" + }, + { + "$ref": "#/components/schemas/UpdateAssistantMessage" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ReasoningMessage" + }, + { + "$ref": "#/components/schemas/HiddenReasoningMessage" + }, + { + "$ref": "#/components/schemas/ToolCallMessage" + }, + { + "$ref": "#/components/schemas/ToolReturnMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "$ref": "#/components/schemas/ApprovalResponseMessage" + }, + { + "$ref": "#/components/schemas/SummaryMessage" + }, + { + "$ref": "#/components/schemas/EventMessage" + } + ], + "discriminator": { + "propertyName": "message_type", + "mapping": { + "system_message": "#/components/schemas/SystemMessage", + "user_message": "#/components/schemas/UserMessage", + "reasoning_message": "#/components/schemas/ReasoningMessage", + "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage", + "tool_call_message": "#/components/schemas/ToolCallMessage", + "tool_return_message": "#/components/schemas/ToolReturnMessage", + "assistant_message": "#/components/schemas/AssistantMessage", + "approval_request_message": "#/components/schemas/ApprovalRequestMessage", + "approval_response_message": "#/components/schemas/ApprovalResponseMessage", + "summary": "#/components/schemas/SummaryMessage", + "event": "#/components/schemas/EventMessage" + } + }, + "title": "Response Modify Message" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/messages/stream": { + "post": { + "tags": ["agents"], + "summary": "Send Message Streaming", + "description": "Process a user message and return the agent's response.\n\nDeprecated: Use the `POST /{agent_id}/messages` endpoint with `streaming=true` in the request body instead.\n\nThis endpoint accepts a message from a user and processes it through the agent.\nIt will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.", + "operationId": "create_agent_message_stream", + "deprecated": true, + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaStreamingRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaStreamingResponse" + } + }, + "text/event-stream": { + "description": "Server-Sent Events stream" + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/messages/cancel": { + "post": { + "tags": ["agents"], + "summary": "Cancel Message", + "description": "Cancel runs associated with an agent. If run_ids are passed in, cancel those in particular.\n\nNote to cancel active runs associated with an agent, redis is required.", + "operationId": "cancel_message", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CancelAgentRunRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": true, + "title": "Response Cancel Message" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/messages/search": { + "post": { + "tags": ["agents"], + "summary": "Search Messages", + "description": "Search messages across the entire organization with optional project and template filtering. Returns messages with FTS/vector ranks and total RRF score.\n\nThis is a cloud-only feature.", + "operationId": "search_messages", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MessageSearchRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MessageSearchResult" + }, + "title": "Response Search Messages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/messages/async": { + "post": { + "tags": ["agents"], + "summary": "Send Message Async", + "description": "Asynchronously process a user message and return a run object.\nThe actual processing happens in the background, and the status can be checked using the run ID.\n\nThis is \"asynchronous\" in the sense that it's a background run and explicitly must be fetched by the run ID.", + "operationId": "create_agent_message_async", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaAsyncRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Run" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/reset-messages": { + "patch": { + "tags": ["agents"], + "summary": "Reset Messages", + "description": "Resets the messages for an agent", + "operationId": "reset_messages", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ResetMessagesRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentState" + }, + { + "type": "null" + } + ], + "title": "Response Reset Messages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/groups": { + "get": { + "tags": ["agents"], + "summary": "List Groups For Agent", + "description": "Lists the groups for an agent.", + "operationId": "list_groups_for_agent", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + }, + { + "name": "manager_type", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Manager type to filter groups by", + "title": "Manager Type" + }, + "description": "Manager type to filter groups by" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Group ID cursor for pagination. Returns groups that come before this group ID in the specified sort order", + "title": "Before" + }, + "description": "Group ID cursor for pagination. Returns groups that come before this group ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Group ID cursor for pagination. Returns groups that come after this group ID in the specified sort order", + "title": "After" + }, + "description": "Group ID cursor for pagination. Returns groups that come after this group ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of groups to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of groups to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for groups by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for groups by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Group" + }, + "title": "Response List Groups For Agent" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/messages/preview-raw-payload": { + "post": { + "tags": ["agents"], + "summary": "Preview Model Request", + "description": "Inspect the raw LLM request payload without sending it.\n\nThis endpoint processes the message through the agent loop up until\nthe LLM request, then returns the raw request payload that would\nbe sent to the LLM provider. Useful for debugging and inspection.", + "operationId": "preview_model_request", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/LettaRequest" + }, + { + "$ref": "#/components/schemas/LettaStreamingRequest" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": true, + "title": "Response Preview Model Request" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/summarize": { + "post": { + "tags": ["agents"], + "summary": "Summarize Messages", + "description": "Summarize an agent's conversation history.", + "operationId": "summarize_messages", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionRequest" + }, + { + "type": "null" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CompactionResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/conversations/": { + "post": { + "tags": ["conversations"], + "summary": "Create Conversation", + "description": "Create a new conversation for an agent.", + "operationId": "create_conversation", + "parameters": [ + { + "name": "agent_id", + "in": "query", + "required": true, + "schema": { + "type": "string", + "description": "The agent ID to create a conversation for", + "title": "Agent Id" + }, + "description": "The agent ID to create a conversation for" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateConversation" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Conversation" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["conversations"], + "summary": "List Conversations", + "description": "List all conversations for an agent.", + "operationId": "list_conversations", + "parameters": [ + { + "name": "agent_id", + "in": "query", + "required": true, + "schema": { + "type": "string", + "description": "The agent ID to list conversations for", + "title": "Agent Id" + }, + "description": "The agent ID to list conversations for" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "description": "Maximum number of conversations to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of conversations to return" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Cursor for pagination (conversation ID)", + "title": "After" + }, + "description": "Cursor for pagination (conversation ID)" + }, + { + "name": "summary_search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search for text within conversation summaries", + "title": "Summary Search" + }, + "description": "Search for text within conversation summaries" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Conversation" + }, + "title": "Response List Conversations" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/conversations/{conversation_id}": { + "get": { + "tags": ["conversations"], + "summary": "Retrieve Conversation", + "description": "Retrieve a specific conversation.", + "operationId": "retrieve_conversation", + "parameters": [ + { + "name": "conversation_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 1, + "maxLength": 41, + "pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$", + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'", + "examples": [ + "default", + "conv-123e4567-e89b-42d3-8456-426614174000" + ], + "title": "Conversation Id" + }, + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Conversation" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["conversations"], + "summary": "Update Conversation", + "description": "Update a conversation.", + "operationId": "update_conversation", + "parameters": [ + { + "name": "conversation_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 1, + "maxLength": 41, + "pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$", + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'", + "examples": [ + "default", + "conv-123e4567-e89b-42d3-8456-426614174000" + ], + "title": "Conversation Id" + }, + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateConversation" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Conversation" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/conversations/{conversation_id}/messages": { + "get": { + "tags": ["conversations"], + "summary": "List Conversation Messages", + "description": "List all messages in a conversation.\n\nReturns LettaMessage objects (UserMessage, AssistantMessage, etc.) for all\nmessages in the conversation, with support for cursor-based pagination.", + "operationId": "list_conversation_messages", + "parameters": [ + { + "name": "conversation_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 1, + "maxLength": 41, + "pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$", + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'", + "examples": [ + "default", + "conv-123e4567-e89b-42d3-8456-426614174000" + ], + "title": "Conversation Id" + }, + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order", + "title": "Before" + }, + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order", + "title": "After" + }, + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "group_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Group ID to filter messages by.", + "title": "Group Id" + }, + "description": "Group ID to filter messages by." + }, + { + "name": "include_err", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Whether to include error messages and error statuses. For debugging purposes only.", + "title": "Include Err" + }, + "description": "Whether to include error messages and error statuses. For debugging purposes only." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/LettaMessageUnion" + }, + "title": "Response List Conversation Messages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["conversations"], + "summary": "Send Conversation Message", + "description": "Send a message to a conversation and get a response.\n\nThis endpoint sends a message to an existing conversation.\nBy default (streaming=true), returns a streaming response (Server-Sent Events).\nSet streaming=false to get a complete JSON response.", + "operationId": "send_conversation_message", + "parameters": [ + { + "name": "conversation_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 1, + "maxLength": 41, + "pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$", + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'", + "examples": [ + "default", + "conv-123e4567-e89b-42d3-8456-426614174000" + ], + "title": "Conversation Id" + }, + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ConversationMessageRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaResponse" + }, + "description": "JSON response (when streaming=false)" + }, + "text/event-stream": { + "description": "Server-Sent Events stream (default, when streaming=true)" + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/conversations/{conversation_id}/stream": { + "post": { + "tags": ["conversations"], + "summary": "Retrieve Conversation Stream", + "description": "Resume the stream for the most recent active run in a conversation.\n\nThis endpoint allows you to reconnect to an active background stream\nfor a conversation, enabling recovery from network interruptions.", + "operationId": "retrieve_conversation_stream", + "parameters": [ + { + "name": "conversation_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 1, + "maxLength": 41, + "pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$", + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'", + "examples": [ + "default", + "conv-123e4567-e89b-42d3-8456-426614174000" + ], + "title": "Conversation Id" + }, + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RetrieveStreamRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": {} + }, + "text/event-stream": { + "description": "Server-Sent Events stream", + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ReasoningMessage" + }, + { + "$ref": "#/components/schemas/HiddenReasoningMessage" + }, + { + "$ref": "#/components/schemas/ToolCallMessage" + }, + { + "$ref": "#/components/schemas/ToolReturnMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "$ref": "#/components/schemas/ApprovalResponseMessage" + }, + { + "$ref": "#/components/schemas/LettaPing" + }, + { + "$ref": "#/components/schemas/LettaErrorMessage" + }, + { + "$ref": "#/components/schemas/LettaStopReason" + }, + { + "$ref": "#/components/schemas/LettaUsageStatistics" + } + ] + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/conversations/{conversation_id}/cancel": { + "post": { + "tags": ["conversations"], + "summary": "Cancel Conversation", + "description": "Cancel runs associated with a conversation.\n\nNote: To cancel active runs, Redis is required.", + "operationId": "cancel_conversation", + "parameters": [ + { + "name": "conversation_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 1, + "maxLength": 41, + "pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$", + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'", + "examples": [ + "default", + "conv-123e4567-e89b-42d3-8456-426614174000" + ], + "title": "Conversation Id" + }, + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": true, + "title": "Response Cancel Conversation" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/conversations/{conversation_id}/compact": { + "post": { + "tags": ["conversations"], + "summary": "Compact Conversation", + "description": "Compact (summarize) a conversation's message history.\n\nThis endpoint summarizes the in-context messages for a specific conversation,\nreducing the message count while preserving important context.", + "operationId": "compact_conversation", + "parameters": [ + { + "name": "conversation_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 1, + "maxLength": 41, + "pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$", + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'", + "examples": [ + "default", + "conv-123e4567-e89b-42d3-8456-426614174000" + ], + "title": "Conversation Id" + }, + "description": "The conversation identifier. Either the special value 'default' or an ID in the format 'conv-'" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionRequest" + }, + { + "type": "null" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CompactionResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/chat/completions": { + "post": { + "tags": ["chat"], + "summary": "Create Chat Completion", + "description": "Create a chat completion using a Letta agent (OpenAI-compatible).\n\nThis endpoint provides full OpenAI API compatibility. The agent is selected based on:\n- The 'model' parameter in the request (should contain an agent ID in format 'agent-...')\n\nWhen streaming is enabled (stream=true), the response will be Server-Sent Events\nwith ChatCompletionChunk objects.", + "operationId": "create_chat_completion", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChatCompletionRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChatCompletion" + } + }, + "text/event-stream": { + "description": "Server-Sent Events stream (when stream=true)" + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/": { + "get": { + "tags": ["groups"], + "summary": "List Groups", + "description": "Fetch all multi-agent groups matching query.", + "operationId": "list_groups", + "deprecated": true, + "parameters": [ + { + "name": "manager_type", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/ManagerType" + }, + { + "type": "null" + } + ], + "description": "Search groups by manager type", + "title": "Manager Type" + }, + "description": "Search groups by manager type" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Group ID cursor for pagination. Returns groups that come before this group ID in the specified sort order", + "title": "Before" + }, + "description": "Group ID cursor for pagination. Returns groups that come before this group ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Group ID cursor for pagination. Returns groups that come after this group ID in the specified sort order", + "title": "After" + }, + "description": "Group ID cursor for pagination. Returns groups that come after this group ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of groups to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of groups to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for groups by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "asc", + "title": "Order" + }, + "description": "Sort order for groups by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search groups by project id", + "title": "Project Id" + }, + "description": "Search groups by project id" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Group" + }, + "title": "Response List Groups" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["groups"], + "summary": "Create Group", + "description": "Create a new multi-agent group with the specified configuration.", + "operationId": "create_group", + "deprecated": true, + "parameters": [ + { + "name": "X-Project", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The project slug to associate with the group (cloud only).", + "title": "X-Project" + }, + "description": "The project slug to associate with the group (cloud only)." + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Group" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/count": { + "get": { + "tags": ["groups"], + "summary": "Count Groups", + "description": "Get the count of all groups associated with a given user.", + "operationId": "count_groups", + "deprecated": true, + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Groups" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/{group_id}": { + "get": { + "tags": ["groups"], + "summary": "Retrieve Group", + "description": "Retrieve the group by id.", + "operationId": "retrieve_group", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Group" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["groups"], + "summary": "Modify Group", + "description": "Create a new multi-agent group with the specified configuration.", + "operationId": "modify_group", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + }, + { + "name": "X-Project", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The project slug to associate with the group (cloud only).", + "title": "X-Project" + }, + "description": "The project slug to associate with the group (cloud only)." + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Group" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["groups"], + "summary": "Delete Group", + "description": "Delete a multi-agent group.", + "operationId": "delete_group", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/{group_id}/messages": { + "post": { + "tags": ["groups"], + "summary": "Send Group Message", + "description": "Process a user message and return the group's response.\nThis endpoint accepts a message from a user and processes it through through agents in the group based on the specified pattern", + "operationId": "send_group_message", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["groups"], + "summary": "List Group Messages", + "description": "Retrieve message history for an agent.", + "operationId": "list_group_messages", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order", + "title": "Before" + }, + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order", + "title": "After" + }, + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to retrieve", + "default": 10, + "title": "Limit" + }, + "description": "Maximum number of messages to retrieve" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "use_assistant_message", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to use assistant messages", + "deprecated": true, + "default": true, + "title": "Use Assistant Message" + }, + "description": "Whether to use assistant messages", + "deprecated": true + }, + { + "name": "assistant_message_tool_name", + "in": "query", + "required": false, + "schema": { + "type": "string", + "description": "The name of the designated message tool.", + "deprecated": true, + "default": "send_message", + "title": "Assistant Message Tool Name" + }, + "description": "The name of the designated message tool.", + "deprecated": true + }, + { + "name": "assistant_message_tool_kwarg", + "in": "query", + "required": false, + "schema": { + "type": "string", + "description": "The name of the message argument.", + "deprecated": true, + "default": "message", + "title": "Assistant Message Tool Kwarg" + }, + "description": "The name of the message argument.", + "deprecated": true + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/LettaMessageUnion" + }, + "title": "Response List Group Messages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/{group_id}/messages/stream": { + "post": { + "tags": ["groups"], + "summary": "Send Group Message Streaming", + "description": "Process a user message and return the group's responses.\nThis endpoint accepts a message from a user and processes it through agents in the group based on the specified pattern.\nIt will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.", + "operationId": "send_group_message_streaming", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaStreamingRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": {} + }, + "text/event-stream": { + "description": "Server-Sent Events stream" + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/{group_id}/messages/{message_id}": { + "patch": { + "tags": ["groups"], + "summary": "Modify Group Message", + "description": "Update the details of a message associated with an agent.", + "operationId": "modify_group_message", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + }, + { + "name": "message_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^message-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the message in the format 'message-'", + "examples": ["message-123e4567-e89b-42d3-8456-426614174000"], + "title": "Message Id" + }, + "description": "The ID of the message in the format 'message-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/UpdateSystemMessage" + }, + { + "$ref": "#/components/schemas/UpdateUserMessage" + }, + { + "$ref": "#/components/schemas/UpdateReasoningMessage" + }, + { + "$ref": "#/components/schemas/UpdateAssistantMessage" + } + ], + "title": "Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ReasoningMessage" + }, + { + "$ref": "#/components/schemas/HiddenReasoningMessage" + }, + { + "$ref": "#/components/schemas/ToolCallMessage" + }, + { + "$ref": "#/components/schemas/ToolReturnMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "$ref": "#/components/schemas/ApprovalResponseMessage" + }, + { + "$ref": "#/components/schemas/SummaryMessage" + }, + { + "$ref": "#/components/schemas/EventMessage" + } + ], + "discriminator": { + "propertyName": "message_type", + "mapping": { + "system_message": "#/components/schemas/SystemMessage", + "user_message": "#/components/schemas/UserMessage", + "reasoning_message": "#/components/schemas/ReasoningMessage", + "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage", + "tool_call_message": "#/components/schemas/ToolCallMessage", + "tool_return_message": "#/components/schemas/ToolReturnMessage", + "assistant_message": "#/components/schemas/AssistantMessage", + "approval_request_message": "#/components/schemas/ApprovalRequestMessage", + "approval_response_message": "#/components/schemas/ApprovalResponseMessage", + "summary": "#/components/schemas/SummaryMessage", + "event": "#/components/schemas/EventMessage" + } + }, + "title": "Response Modify Group Message" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/{group_id}/reset-messages": { + "patch": { + "tags": ["groups"], + "summary": "Reset Group Messages", + "description": "Delete the group messages for all agents that are part of the multi-agent group.", + "operationId": "reset_group_messages", + "deprecated": true, + "parameters": [ + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/{group_id}/blocks/attach/{block_id}": { + "patch": { + "tags": ["groups"], + "summary": "Attach Block To Group", + "description": "Attach a block to a group.\nThis will add the block to the group and all agents within the group.", + "operationId": "attach_block_to_group", + "deprecated": true, + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Block Id" + } + }, + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/groups/{group_id}/blocks/detach/{block_id}": { + "patch": { + "tags": ["groups"], + "summary": "Detach Block From Group", + "description": "Detach a block from a group.\nThis will remove the block from the group and all agents within the group.", + "operationId": "detach_block_from_group", + "deprecated": true, + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Block Id" + } + }, + { + "name": "group_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the group in the format 'group-'", + "examples": ["group-123e4567-e89b-42d3-8456-426614174000"], + "title": "Group Id" + }, + "description": "The ID of the group in the format 'group-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/identities/": { + "get": { + "tags": ["identities", "identities"], + "summary": "List Identities", + "description": "Get a list of all identities in the database", + "operationId": "list_identities", + "deprecated": true, + "parameters": [ + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + } + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "[DEPRECATED: Use X-Project-Id header instead] Filter identities by project ID", + "deprecated": true, + "title": "Project Id" + }, + "description": "[DEPRECATED: Use X-Project-Id header instead] Filter identities by project ID", + "deprecated": true + }, + { + "name": "identifier_key", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Identifier Key" + } + }, + { + "name": "identity_type", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/IdentityType" + }, + { + "type": "null" + } + ], + "title": "Identity Type" + } + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Identity ID cursor for pagination. Returns identities that come before this identity ID in the specified sort order", + "title": "Before" + }, + "description": "Identity ID cursor for pagination. Returns identities that come before this identity ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Identity ID cursor for pagination. Returns identities that come after this identity ID in the specified sort order", + "title": "After" + }, + "description": "Identity ID cursor for pagination. Returns identities that come after this identity ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of identities to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of identities to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for identities by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for identities by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Identity" + }, + "title": "Response List Identities" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["identities", "identities"], + "summary": "Create Identity", + "operationId": "create_identity", + "deprecated": true, + "parameters": [ + { + "name": "X-Project", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The project slug to associate with the identity (cloud only).", + "title": "X-Project" + }, + "description": "The project slug to associate with the identity (cloud only)." + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IdentityCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Identity" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "put": { + "tags": ["identities", "identities"], + "summary": "Upsert Identity", + "operationId": "upsert_identity", + "deprecated": true, + "parameters": [ + { + "name": "X-Project", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The project slug to associate with the identity (cloud only).", + "title": "X-Project" + }, + "description": "The project slug to associate with the identity (cloud only)." + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IdentityUpsert" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Identity" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/identities/count": { + "get": { + "tags": ["identities", "identities"], + "summary": "Count Identities", + "description": "Get count of all identities for a user", + "operationId": "count_identities", + "deprecated": true, + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Identities" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/identities/{identity_id}": { + "get": { + "tags": ["identities", "identities"], + "summary": "Retrieve Identity", + "operationId": "retrieve_identity", + "deprecated": true, + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Identity" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["identities", "identities"], + "summary": "Modify Identity", + "operationId": "update_identity", + "deprecated": true, + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IdentityUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Identity" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["identities", "identities"], + "summary": "Delete Identity", + "description": "Delete an identity by its identifier key", + "operationId": "delete_identity", + "deprecated": true, + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/identities/{identity_id}/properties": { + "put": { + "tags": ["identities", "identities"], + "summary": "Upsert Properties For Identity", + "operationId": "upsert_properties_for_identity", + "deprecated": true, + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/IdentityProperty" + }, + "title": "Properties" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/identities/{identity_id}/agents": { + "get": { + "tags": ["identities"], + "summary": "List Agents For Identity", + "description": "Get all agents associated with the specified identity.", + "operationId": "list_agents_for_identity", + "deprecated": true, + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order", + "title": "Before" + }, + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order", + "title": "After" + }, + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of agents to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of agents to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "include", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "enum": [ + "agent.blocks", + "agent.identities", + "agent.managed_group", + "agent.pending_approval", + "agent.secrets", + "agent.sources", + "agent.tags", + "agent.tools" + ], + "type": "string" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default.", + "default": [], + "title": "Include" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentState" + }, + "title": "Response List Agents For Identity" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/identities/{identity_id}/blocks": { + "get": { + "tags": ["identities"], + "summary": "List Blocks For Identity", + "description": "Get all blocks associated with the specified identity.", + "operationId": "list_blocks_for_identity", + "deprecated": true, + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order", + "title": "Before" + }, + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order", + "title": "After" + }, + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of blocks to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of blocks to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlockResponse" + }, + "title": "Response List Blocks For Identity" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_agents/count": { + "get": { + "tags": ["_internal_agents"], + "summary": "Count Agents", + "description": "Get the total number of agents for a user, with option to exclude hidden agents.", + "operationId": "count_internal_agents", + "parameters": [ + { + "name": "exclude_hidden", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "If True, excludes hidden agents from the count. If False, includes all agents.", + "default": true, + "title": "Exclude Hidden" + }, + "description": "If True, excludes hidden agents from the count. If False, includes all agents." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Internal Agents" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_agents/{agent_id}/core-memory/blocks/{block_label}": { + "patch": { + "tags": ["_internal_agents"], + "summary": "Modify Block For Agent", + "description": "Updates a core memory block of an agent.", + "operationId": "modify_internal_core_memory_block", + "parameters": [ + { + "name": "block_label", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Block Label" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"], + "title": "Agent Id" + }, + "description": "The ID of the agent in the format 'agent-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Block" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_blocks/": { + "get": { + "tags": ["_internal_blocks"], + "summary": "List Blocks", + "operationId": "list_internal_blocks", + "parameters": [ + { + "name": "label", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 50, + "pattern": "^[a-zA-Z0-9_/-]+$" + }, + { + "type": "null" + } + ], + "description": "Label to include (alphanumeric, hyphens, underscores, forward slashes)", + "examples": [ + "human", + "persona", + "the_label_of-a-block", + "the_label_of-a-block/with-forward-slash" + ], + "title": "Label" + }, + "description": "Label to include (alphanumeric, hyphens, underscores, forward slashes)" + }, + { + "name": "templates_only", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to include only templates", + "default": false, + "title": "Templates Only" + }, + "description": "Whether to include only templates" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 100, + "pattern": "^[a-zA-Z0-9 _-]+$" + }, + { + "type": "null" + } + ], + "description": "Name filter (alphanumeric, spaces, hyphens, underscores)", + "examples": ["My Agent", "test_tool", "default-config"], + "title": "Name" + }, + "description": "Name filter (alphanumeric, spaces, hyphens, underscores)" + }, + { + "name": "identity_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + }, + { + "type": "null" + } + ], + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + }, + { + "name": "identifier_keys", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Search agents by identifier keys", + "title": "Identifier Keys" + }, + "description": "Search agents by identifier keys" + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search blocks by project id", + "title": "Project Id" + }, + "description": "Search blocks by project id" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Number of blocks to return", + "default": 50, + "title": "Limit" + }, + "description": "Number of blocks to return" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order", + "title": "Before" + }, + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order", + "title": "After" + }, + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "asc", + "title": "Order" + }, + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "label_search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 50, + "pattern": "^[a-zA-Z0-9_/-]+$" + }, + { + "type": "null" + } + ], + "description": "Search blocks by label. If provided, returns blocks whose label matches the search query. This is a full-text search on block labels.", + "examples": [ + "human", + "persona", + "the_label_of-a-block", + "the_label_of-a-block/with-forward-slash" + ], + "title": "Label Search" + }, + "description": "Search blocks by label. If provided, returns blocks whose label matches the search query. This is a full-text search on block labels." + }, + { + "name": "description_search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 200 + }, + { + "type": "null" + } + ], + "description": "Search blocks by description. If provided, returns blocks whose description matches the search query. This is a full-text search on block descriptions.", + "title": "Description Search" + }, + "description": "Search blocks by description. If provided, returns blocks whose description matches the search query. This is a full-text search on block descriptions." + }, + { + "name": "value_search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 200 + }, + { + "type": "null" + } + ], + "description": "Search blocks by value. If provided, returns blocks whose value matches the search query. This is a full-text search on block values.", + "title": "Value Search" + }, + "description": "Search blocks by value. If provided, returns blocks whose value matches the search query. This is a full-text search on block values." + }, + { + "name": "connected_to_agents_count_gt", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have more than this number of connected agents.", + "title": "Connected To Agents Count Gt" + }, + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have more than this number of connected agents." + }, + { + "name": "connected_to_agents_count_lt", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have less than this number of connected agents.", + "title": "Connected To Agents Count Lt" + }, + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have less than this number of connected agents." + }, + { + "name": "connected_to_agents_count_eq", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer" + } + }, + { + "type": "null" + } + ], + "description": "Filter blocks by the exact number of connected agents. If provided, returns blocks that have exactly this number of connected agents.", + "title": "Connected To Agents Count Eq" + }, + "description": "Filter blocks by the exact number of connected agents. If provided, returns blocks that have exactly this number of connected agents." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Block" + }, + "title": "Response List Internal Blocks" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["_internal_blocks"], + "summary": "Create Block", + "operationId": "create_internal_block", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBlock" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Block" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_blocks/{block_id}": { + "delete": { + "tags": ["_internal_blocks"], + "summary": "Delete Block", + "operationId": "delete_internal_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_blocks/{block_id}/agents": { + "get": { + "tags": ["_internal_blocks"], + "summary": "List Agents For Block", + "description": "Retrieves all agents associated with the specified block.\nRaises a 404 if the block does not exist.", + "operationId": "list_agents_for_internal_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order", + "title": "Before" + }, + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order", + "title": "After" + }, + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of agents to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of agents to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "include_relationships", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true, + "title": "Include Relationships" + }, + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true + }, + { + "name": "include", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default.", + "default": [], + "title": "Include" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentState" + }, + "title": "Response List Agents For Internal Block" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_runs/": { + "get": { + "tags": ["_internal_runs"], + "summary": "List Runs", + "description": "List all runs.", + "operationId": "list_internal_runs", + "parameters": [ + { + "name": "run_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by a specific run ID.", + "title": "Run Id" + }, + "description": "Filter by a specific run ID." + }, + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The unique identifier of the agent associated with the run.", + "title": "Agent Id" + }, + "description": "The unique identifier of the agent associated with the run." + }, + { + "name": "agent_ids", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "The unique identifiers of the agents associated with the run. Deprecated in favor of agent_id field.", + "deprecated": true, + "title": "Agent Ids" + }, + "description": "The unique identifiers of the agents associated with the run. Deprecated in favor of agent_id field.", + "deprecated": true + }, + { + "name": "statuses", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter runs by status. Can specify multiple statuses.", + "title": "Statuses" + }, + "description": "Filter runs by status. Can specify multiple statuses." + }, + { + "name": "background", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "If True, filters for runs that were created in background mode.", + "title": "Background" + }, + "description": "If True, filters for runs that were created in background mode." + }, + { + "name": "stop_reason", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "Filter runs by stop reason.", + "title": "Stop Reason" + }, + "description": "Filter runs by stop reason." + }, + { + "name": "template_family", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter runs by template family (base_template_id).", + "title": "Template Family" + }, + "description": "Filter runs by template family (base_template_id)." + }, + { + "name": "step_count", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Filter runs by step count. Must be provided with step_count_operator.", + "title": "Step Count" + }, + "description": "Filter runs by step count. Must be provided with step_count_operator." + }, + { + "name": "step_count_operator", + "in": "query", + "required": false, + "schema": { + "$ref": "#/components/schemas/ComparisonOperator", + "description": "Operator for step_count filter: 'eq' for equals, 'gte' for greater than or equal, 'lte' for less than or equal.", + "default": "eq" + }, + "description": "Operator for step_count filter: 'eq' for equals, 'gte' for greater than or equal, 'lte' for less than or equal." + }, + { + "name": "tools_used", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter runs that used any of the specified tools.", + "title": "Tools Used" + }, + "description": "Filter runs that used any of the specified tools." + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Run ID cursor for pagination. Returns runs that come before this run ID in the specified sort order", + "title": "Before" + }, + "description": "Run ID cursor for pagination. Returns runs that come before this run ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Run ID cursor for pagination. Returns runs that come after this run ID in the specified sort order", + "title": "After" + }, + "description": "Run ID cursor for pagination. Returns runs that come after this run ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer", + "maximum": 1000, + "minimum": 1 + }, + { + "type": "null" + } + ], + "description": "Maximum number of runs to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of runs to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for runs by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for runs by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "enum": ["created_at", "duration"], + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "active", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Filter for active runs.", + "default": false, + "title": "Active" + }, + "description": "Filter for active runs." + }, + { + "name": "ascending", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to sort agents oldest to newest (True) or newest to oldest (False, default). Deprecated in favor of order field.", + "deprecated": true, + "default": false, + "title": "Ascending" + }, + "description": "Whether to sort agents oldest to newest (True) or newest to oldest (False, default). Deprecated in favor of order field.", + "deprecated": true + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter runs by project ID.", + "title": "Project Id" + }, + "description": "Filter runs by project ID." + }, + { + "name": "conversation_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter runs by conversation ID.", + "title": "Conversation Id" + }, + "description": "Filter runs by conversation ID." + }, + { + "name": "duration_percentile", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Filter runs by duration percentile (1-100). Returns runs slower than this percentile.", + "title": "Duration Percentile" + }, + "description": "Filter runs by duration percentile (1-100). Returns runs slower than this percentile." + }, + { + "name": "duration_value", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Duration value in nanoseconds for filtering. Must be used with duration_operator.", + "title": "Duration Value" + }, + "description": "Duration value in nanoseconds for filtering. Must be used with duration_operator." + }, + { + "name": "duration_operator", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "enum": ["gt", "lt", "eq"], + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Comparison operator for duration filter: 'gt' (greater than), 'lt' (less than), 'eq' (equals).", + "title": "Duration Operator" + }, + "description": "Comparison operator for duration filter: 'gt' (greater than), 'lt' (less than), 'eq' (equals)." + }, + { + "name": "start_date", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "description": "Filter runs created on or after this date (ISO 8601 format).", + "title": "Start Date" + }, + "description": "Filter runs created on or after this date (ISO 8601 format)." + }, + { + "name": "end_date", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "description": "Filter runs created on or before this date (ISO 8601 format).", + "title": "End Date" + }, + "description": "Filter runs created on or before this date (ISO 8601 format)." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Run" + }, + "title": "Response List Internal Runs" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_templates/groups": { + "post": { + "tags": ["_internal_templates"], + "summary": "Create Group", + "description": "Create a new multi-agent group with the specified configuration.", + "operationId": "create_internal_template_group", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InternalTemplateGroupCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Group" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_templates/agents": { + "post": { + "tags": ["_internal_templates"], + "summary": "Create Agent", + "description": "Create a new agent with template-related fields.", + "operationId": "create_internal_template_agent", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InternalTemplateAgentCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentState" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_templates/blocks": { + "post": { + "tags": ["_internal_templates"], + "summary": "Create Block", + "description": "Create a new block with template-related fields.", + "operationId": "create_internal_template_block", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InternalTemplateBlockCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Block" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_templates/blocks/batch": { + "post": { + "tags": ["_internal_templates"], + "summary": "Create Blocks Batch", + "description": "Create multiple blocks with template-related fields.", + "operationId": "create_internal_template_blocks_batch", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/InternalTemplateBlockCreate" + }, + "title": "Blocks" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Block" + }, + "title": "Response Create Internal Template Blocks Batch" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/_internal_templates/deployment/{deployment_id}": { + "get": { + "tags": ["_internal_templates"], + "summary": "List Deployment Entities", + "description": "List all entities (blocks, agents, groups) with the specified deployment_id.\nOptionally filter by entity types.", + "operationId": "list_deployment_entities", + "parameters": [ + { + "name": "deployment_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Deployment Id" + } + }, + { + "name": "entity_types", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by entity types (block, agent, group)", + "title": "Entity Types" + }, + "description": "Filter by entity types (block, agent, group)" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListDeploymentEntitiesResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["_internal_templates"], + "summary": "Delete Deployment", + "description": "Delete all entities (blocks, agents, groups) with the specified deployment_id.\nDeletion order: blocks -> agents -> groups to maintain referential integrity.", + "operationId": "delete_deployment", + "parameters": [ + { + "name": "deployment_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Deployment Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DeleteDeploymentResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/models/": { + "get": { + "tags": ["models", "llms"], + "summary": "List Llm Models", + "description": "List available LLM models using the asynchronous implementation for improved performance.\n\nReturns Model format which extends LLMConfig with additional metadata fields.\nLegacy LLMConfig fields are marked as deprecated but still available for backward compatibility.", + "operationId": "list_models", + "parameters": [ + { + "name": "provider_category", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/components/schemas/ProviderCategory" + } + }, + { + "type": "null" + } + ], + "title": "Provider Category" + } + }, + { + "name": "provider_name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider Name" + } + }, + { + "name": "provider_type", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/ProviderType" + }, + { + "type": "null" + } + ], + "title": "Provider Type" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Model" + }, + "title": "Response List Models" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/models/embedding": { + "get": { + "tags": ["models", "llms"], + "summary": "List Embedding Models", + "description": "List available embedding models using the asynchronous implementation for improved performance.\n\nReturns EmbeddingModel format which extends EmbeddingConfig with additional metadata fields.\nLegacy EmbeddingConfig fields are marked as deprecated but still available for backward compatibility.", + "operationId": "list_embedding_models", + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/EmbeddingModel" + }, + "title": "Response List Embedding Models" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/mcp-servers/": { + "post": { + "tags": ["mcp-servers"], + "summary": "Create Mcp Server", + "description": "Add a new MCP server to the Letta MCP server config", + "operationId": "mcp_create_mcp_server", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateMCPServerRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioMCPServer" + }, + { + "$ref": "#/components/schemas/SSEMCPServer" + }, + { + "$ref": "#/components/schemas/StreamableHTTPMCPServer" + } + ], + "title": "Response Mcp Create Mcp Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["mcp-servers"], + "summary": "List Mcp Servers", + "description": "Get a list of all configured MCP servers", + "operationId": "mcp_list_mcp_servers", + "parameters": [], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioMCPServer" + }, + { + "$ref": "#/components/schemas/SSEMCPServer" + }, + { + "$ref": "#/components/schemas/StreamableHTTPMCPServer" + } + ] + }, + "title": "Response Mcp List Mcp Servers" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/mcp-servers/{mcp_server_id}": { + "get": { + "tags": ["mcp-servers"], + "summary": "Retrieve Mcp Server", + "description": "Get a specific MCP server", + "operationId": "mcp_retrieve_mcp_server", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioMCPServer" + }, + { + "$ref": "#/components/schemas/SSEMCPServer" + }, + { + "$ref": "#/components/schemas/StreamableHTTPMCPServer" + } + ], + "title": "Response Mcp Retrieve Mcp Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["mcp-servers"], + "summary": "Delete Mcp Server", + "description": "Delete an MCP server by its ID", + "operationId": "mcp_delete_mcp_server", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + } + ], + "responses": { + "204": { + "description": "Successful Response" + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["mcp-servers"], + "summary": "Update Mcp Server", + "description": "Update an existing MCP server configuration", + "operationId": "mcp_update_mcp_server", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateMCPServerRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioMCPServer" + }, + { + "$ref": "#/components/schemas/SSEMCPServer" + }, + { + "$ref": "#/components/schemas/StreamableHTTPMCPServer" + } + ], + "title": "Response Mcp Update Mcp Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/mcp-servers/{mcp_server_id}/tools": { + "get": { + "tags": ["mcp-servers"], + "summary": "List Tools For Mcp Server", + "description": "Get a list of all tools for a specific MCP server", + "operationId": "mcp_list_tools_for_mcp_server", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + }, + "title": "Response Mcp List Tools For Mcp Server" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/mcp-servers/{mcp_server_id}/tools/{tool_id}": { + "get": { + "tags": ["mcp-servers"], + "summary": "Retrieve Mcp Tool", + "description": "Get a specific MCP tool by its ID", + "operationId": "mcp_retrieve_mcp_tool", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + }, + { + "name": "tool_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Tool Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Tool" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/mcp-servers/{mcp_server_id}/tools/{tool_id}/run": { + "post": { + "tags": ["mcp-servers"], + "summary": "Run Mcp Tool", + "description": "Execute a specific MCP tool\n\nThe request body should contain the tool arguments in the ToolExecuteRequest format.", + "operationId": "mcp_run_tool", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + }, + { + "name": "tool_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Tool Id" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/letta__schemas__mcp_server__ToolExecuteRequest", + "default": { + "args": {} + } + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ToolExecutionResult" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/mcp-servers/{mcp_server_id}/refresh": { + "patch": { + "tags": ["mcp-servers"], + "summary": "Refresh Mcp Server Tools", + "description": "Refresh tools for an MCP server by:\n1. Fetching current tools from the MCP server\n2. Deleting tools that no longer exist on the server\n3. Updating schemas for existing tools\n4. Adding new tools from the server\n\nReturns a summary of changes made.", + "operationId": "mcp_refresh_mcp_server_tools", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + }, + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/mcp-servers/connect/{mcp_server_id}": { + "get": { + "tags": ["mcp-servers"], + "summary": "Connect Mcp Server", + "description": "Connect to an MCP server with support for OAuth via SSE.\nReturns a stream of events handling authorization state and exchange if OAuth is required.", + "operationId": "mcp_connect_mcp_server", + "parameters": [ + { + "name": "mcp_server_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Mcp Server Id" + } + } + ], + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": {} + }, + "text/event-stream": { + "description": "Server-Sent Events stream" + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/blocks/": { + "get": { + "tags": ["blocks"], + "summary": "List Blocks", + "operationId": "list_blocks", + "parameters": [ + { + "name": "label", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 50, + "pattern": "^[a-zA-Z0-9_/-]+$" + }, + { + "type": "null" + } + ], + "description": "Label to include (alphanumeric, hyphens, underscores, forward slashes)", + "examples": [ + "human", + "persona", + "the_label_of-a-block", + "the_label_of-a-block/with-forward-slash" + ], + "title": "Label" + }, + "description": "Label to include (alphanumeric, hyphens, underscores, forward slashes)" + }, + { + "name": "templates_only", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to include only templates", + "default": false, + "title": "Templates Only" + }, + "description": "Whether to include only templates" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 100, + "pattern": "^[a-zA-Z0-9 _-]+$" + }, + { + "type": "null" + } + ], + "description": "Name filter (alphanumeric, spaces, hyphens, underscores)", + "examples": ["My Agent", "test_tool", "default-config"], + "title": "Name" + }, + "description": "Name filter (alphanumeric, spaces, hyphens, underscores)" + }, + { + "name": "identity_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^identity-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + }, + { + "type": "null" + } + ], + "description": "The ID of the identity in the format 'identity-'", + "examples": ["identity-123e4567-e89b-42d3-8456-426614174000"], + "title": "Identity Id" + }, + "description": "The ID of the identity in the format 'identity-'" + }, + { + "name": "identifier_keys", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Search agents by identifier keys", + "title": "Identifier Keys" + }, + "description": "Search agents by identifier keys" + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search blocks by project id", + "title": "Project Id" + }, + "description": "Search blocks by project id" + }, + { + "name": "tags", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "List of tags to filter blocks by", + "title": "Tags" + }, + "description": "List of tags to filter blocks by" + }, + { + "name": "match_all_tags", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "If True, only returns blocks that match ALL given tags. Otherwise, return blocks that have ANY of the passed-in tags.", + "default": false, + "title": "Match All Tags" + }, + "description": "If True, only returns blocks that match ALL given tags. Otherwise, return blocks that have ANY of the passed-in tags." + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Number of blocks to return", + "default": 50, + "title": "Limit" + }, + "description": "Number of blocks to return" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order", + "title": "Before" + }, + "description": "Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order", + "title": "After" + }, + "description": "Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "asc", + "title": "Order" + }, + "description": "Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "label_search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 50, + "pattern": "^[a-zA-Z0-9_/-]+$" + }, + { + "type": "null" + } + ], + "description": "Search blocks by label. If provided, returns blocks whose label matches the search query. This is a full-text search on block labels.", + "examples": [ + "human", + "persona", + "the_label_of-a-block", + "the_label_of-a-block/with-forward-slash" + ], + "title": "Label Search" + }, + "description": "Search blocks by label. If provided, returns blocks whose label matches the search query. This is a full-text search on block labels." + }, + { + "name": "description_search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 200 + }, + { + "type": "null" + } + ], + "description": "Search blocks by description. If provided, returns blocks whose description matches the search query. This is a full-text search on block descriptions.", + "title": "Description Search" + }, + "description": "Search blocks by description. If provided, returns blocks whose description matches the search query. This is a full-text search on block descriptions." + }, + { + "name": "value_search", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 200 + }, + { + "type": "null" + } + ], + "description": "Search blocks by value. If provided, returns blocks whose value matches the search query. This is a full-text search on block values.", + "title": "Value Search" + }, + "description": "Search blocks by value. If provided, returns blocks whose value matches the search query. This is a full-text search on block values." + }, + { + "name": "connected_to_agents_count_gt", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have more than this number of connected agents.", + "title": "Connected To Agents Count Gt" + }, + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have more than this number of connected agents." + }, + { + "name": "connected_to_agents_count_lt", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have less than this number of connected agents.", + "title": "Connected To Agents Count Lt" + }, + "description": "Filter blocks by the number of connected agents. If provided, returns blocks that have less than this number of connected agents." + }, + { + "name": "connected_to_agents_count_eq", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer" + } + }, + { + "type": "null" + } + ], + "description": "Filter blocks by the exact number of connected agents. If provided, returns blocks that have exactly this number of connected agents.", + "title": "Connected To Agents Count Eq" + }, + "description": "Filter blocks by the exact number of connected agents. If provided, returns blocks that have exactly this number of connected agents." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlockResponse" + }, + "title": "Response List Blocks" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["blocks"], + "summary": "Create Block", + "operationId": "create_block", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBlock" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/blocks/count": { + "get": { + "tags": ["blocks"], + "summary": "Count Blocks", + "description": "Count all blocks with optional filtering.\nSupports the same filters as list_blocks for consistent querying.", + "operationId": "count_blocks", + "parameters": [ + { + "name": "label", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 50, + "pattern": "^[a-zA-Z0-9_/-]+$" + }, + { + "type": "null" + } + ], + "description": "Label to include (alphanumeric, hyphens, underscores, forward slashes)", + "examples": [ + "human", + "persona", + "the_label_of-a-block", + "the_label_of-a-block/with-forward-slash" + ], + "title": "Label" + }, + "description": "Label to include (alphanumeric, hyphens, underscores, forward slashes)" + }, + { + "name": "templates_only", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to include only templates", + "default": false, + "title": "Templates Only" + }, + "description": "Whether to include only templates" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "maxLength": 100, + "pattern": "^[a-zA-Z0-9 _-]+$" + }, + { + "type": "null" + } + ], + "description": "Name filter (alphanumeric, spaces, hyphens, underscores)", + "examples": ["My Agent", "test_tool", "default-config"], + "title": "Name" + }, + "description": "Name filter (alphanumeric, spaces, hyphens, underscores)" + }, + { + "name": "tags", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "List of tags to filter blocks by", + "title": "Tags" + }, + "description": "List of tags to filter blocks by" + }, + { + "name": "match_all_tags", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "If True, only counts blocks that match ALL given tags. Otherwise, counts blocks that have ANY of the passed-in tags.", + "default": false, + "title": "Match All Tags" + }, + "description": "If True, only counts blocks that match ALL given tags. Otherwise, counts blocks that have ANY of the passed-in tags." + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Search blocks by project id", + "title": "Project Id" + }, + "description": "Search blocks by project id" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "integer", + "title": "Response Count Blocks" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/blocks/{block_id}": { + "patch": { + "tags": ["blocks"], + "summary": "Modify Block", + "operationId": "modify_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["blocks"], + "summary": "Delete Block", + "operationId": "delete_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["blocks"], + "summary": "Retrieve Block", + "operationId": "retrieve_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/blocks/{block_id}/agents": { + "get": { + "tags": ["blocks"], + "summary": "List Agents For Block", + "description": "Retrieves all agents associated with the specified block.\nRaises a 404 if the block does not exist.", + "operationId": "list_agents_for_block", + "parameters": [ + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order", + "title": "Before" + }, + "description": "Agent ID cursor for pagination. Returns agents that come before this agent ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order", + "title": "After" + }, + "description": "Agent ID cursor for pagination. Returns agents that come after this agent ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of agents to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of agents to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "include_relationships", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true, + "title": "Include Relationships" + }, + "description": "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. If not provided, all relationships are loaded by default. Using this can optimize performance by reducing unnecessary joins.This is a legacy parameter, and no longer supported after 1.0.0 SDK versions.", + "deprecated": true + }, + { + "name": "include", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "enum": [ + "agent.blocks", + "agent.identities", + "agent.managed_group", + "agent.pending_approval", + "agent.secrets", + "agent.sources", + "agent.tags", + "agent.tools" + ], + "type": "string" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default.", + "default": [], + "title": "Include" + }, + "description": "Specify which relational fields to include in the response. No relationships are included by default." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentState" + }, + "title": "Response List Agents For Block" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/blocks/{block_id}/identities/attach/{identity_id}": { + "patch": { + "tags": ["blocks"], + "summary": "Attach Identity To Block", + "description": "Attach an identity to a block.", + "operationId": "attach_identity_to_block", + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Identity Id" + } + }, + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/blocks/{block_id}/identities/detach/{identity_id}": { + "patch": { + "tags": ["blocks"], + "summary": "Detach Identity From Block", + "description": "Detach an identity from a block.", + "operationId": "detach_identity_from_block", + "parameters": [ + { + "name": "identity_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Identity Id" + } + }, + { + "name": "block_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 42, + "maxLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"], + "title": "Block Id" + }, + "description": "The ID of the block in the format 'block-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlockResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/jobs/": { + "get": { + "tags": ["jobs"], + "summary": "List Jobs", + "description": "List all jobs.", + "operationId": "list_jobs", + "parameters": [ + { + "name": "source_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `folder_id` parameter instead. Only list jobs associated with the source.", + "deprecated": true, + "title": "Source Id" + }, + "description": "Deprecated: Use `folder_id` parameter instead. Only list jobs associated with the source.", + "deprecated": true + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Job ID cursor for pagination. Returns jobs that come before this job ID in the specified sort order", + "title": "Before" + }, + "description": "Job ID cursor for pagination. Returns jobs that come before this job ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Job ID cursor for pagination. Returns jobs that come after this job ID in the specified sort order", + "title": "After" + }, + "description": "Job ID cursor for pagination. Returns jobs that come after this job ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of jobs to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of jobs to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for jobs by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for jobs by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "active", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Filter for active jobs.", + "default": false, + "title": "Active" + }, + "description": "Filter for active jobs." + }, + { + "name": "ascending", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to sort jobs oldest to newest (True, default) or newest to oldest (False). Deprecated in favor of order field.", + "deprecated": true, + "default": true, + "title": "Ascending" + }, + "description": "Whether to sort jobs oldest to newest (True, default) or newest to oldest (False). Deprecated in favor of order field.", + "deprecated": true + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Job" + }, + "title": "Response List Jobs" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/jobs/active": { + "get": { + "tags": ["jobs"], + "summary": "List Active Jobs", + "description": "List all active jobs.", + "operationId": "list_active_jobs", + "deprecated": true, + "parameters": [ + { + "name": "source_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `folder_id` parameter instead. Only list jobs associated with the source.", + "deprecated": true, + "title": "Source Id" + }, + "description": "Deprecated: Use `folder_id` parameter instead. Only list jobs associated with the source.", + "deprecated": true + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Cursor for pagination", + "title": "Before" + }, + "description": "Cursor for pagination" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Cursor for pagination", + "title": "After" + }, + "description": "Cursor for pagination" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Limit for pagination", + "default": 50, + "title": "Limit" + }, + "description": "Limit for pagination" + }, + { + "name": "ascending", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to sort jobs oldest to newest (True, default) or newest to oldest (False)", + "default": true, + "title": "Ascending" + }, + "description": "Whether to sort jobs oldest to newest (True, default) or newest to oldest (False)" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Job" + }, + "title": "Response List Active Jobs" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/jobs/{job_id}": { + "get": { + "tags": ["jobs"], + "summary": "Retrieve Job", + "description": "Get the status of a job.", + "operationId": "retrieve_job", + "parameters": [ + { + "name": "job_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 40, + "maxLength": 40, + "pattern": "^job-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the job in the format 'job-'", + "examples": ["job-123e4567-e89b-42d3-8456-426614174000"], + "title": "Job Id" + }, + "description": "The ID of the job in the format 'job-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Job" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["jobs"], + "summary": "Delete Job", + "description": "Delete a job by its job_id.", + "operationId": "delete_job", + "parameters": [ + { + "name": "job_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 40, + "maxLength": 40, + "pattern": "^job-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the job in the format 'job-'", + "examples": ["job-123e4567-e89b-42d3-8456-426614174000"], + "title": "Job Id" + }, + "description": "The ID of the job in the format 'job-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Job" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/jobs/{job_id}/cancel": { + "patch": { + "tags": ["jobs"], + "summary": "Cancel Job", + "description": "Cancel a job by its job_id.\n\nThis endpoint marks a job as cancelled, which will cause any associated\nagent execution to terminate as soon as possible.", + "operationId": "cancel_job", + "parameters": [ + { + "name": "job_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 40, + "maxLength": 40, + "pattern": "^job-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the job in the format 'job-'", + "examples": ["job-123e4567-e89b-42d3-8456-426614174000"], + "title": "Job Id" + }, + "description": "The ID of the job in the format 'job-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Job" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/health/": { + "get": { + "tags": ["health"], + "summary": "Check Health", + "description": "Async health check endpoint.", + "operationId": "check_health", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Health" + } + } + } + } + } + } + }, + "/v1/providers/": { + "get": { + "tags": ["providers"], + "summary": "List Providers", + "description": "Get a list of all custom providers.", + "operationId": "list_providers", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Provider ID cursor for pagination. Returns providers that come before this provider ID in the specified sort order", + "title": "Before" + }, + "description": "Provider ID cursor for pagination. Returns providers that come before this provider ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Provider ID cursor for pagination. Returns providers that come after this provider ID in the specified sort order", + "title": "After" + }, + "description": "Provider ID cursor for pagination. Returns providers that come after this provider ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of providers to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of providers to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for providers by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for providers by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter providers by name", + "title": "Name" + }, + "description": "Filter providers by name" + }, + { + "name": "provider_type", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/ProviderType" + }, + { + "type": "null" + } + ], + "description": "Filter providers by type", + "title": "Provider Type" + }, + "description": "Filter providers by type" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Provider" + }, + "title": "Response List Providers" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "post": { + "tags": ["providers"], + "summary": "Create Provider", + "description": "Create a new custom provider.", + "operationId": "create_provider", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProviderCreate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Provider" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/providers/{provider_id}": { + "get": { + "tags": ["providers"], + "summary": "Retrieve Provider", + "description": "Get a provider by ID.", + "operationId": "retrieve_provider", + "parameters": [ + { + "name": "provider_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^provider-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the provider in the format 'provider-'", + "examples": ["provider-123e4567-e89b-42d3-8456-426614174000"], + "title": "Provider Id" + }, + "description": "The ID of the provider in the format 'provider-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Provider" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "patch": { + "tags": ["providers"], + "summary": "Modify Provider", + "description": "Update an existing custom provider.", + "operationId": "modify_provider", + "parameters": [ + { + "name": "provider_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^provider-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the provider in the format 'provider-'", + "examples": ["provider-123e4567-e89b-42d3-8456-426614174000"], + "title": "Provider Id" + }, + "description": "The ID of the provider in the format 'provider-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProviderUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Provider" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["providers"], + "summary": "Delete Provider", + "description": "Delete an existing custom provider.", + "operationId": "delete_provider", + "parameters": [ + { + "name": "provider_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^provider-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the provider in the format 'provider-'", + "examples": ["provider-123e4567-e89b-42d3-8456-426614174000"], + "title": "Provider Id" + }, + "description": "The ID of the provider in the format 'provider-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/providers/check": { + "post": { + "tags": ["providers"], + "summary": "Check Provider", + "description": "Verify the API key and additional parameters for a provider.", + "operationId": "check_provider", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProviderCheck" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/providers/{provider_id}/check": { + "post": { + "tags": ["providers"], + "summary": "Check Existing Provider", + "description": "Verify the API key and additional parameters for an existing provider.", + "operationId": "check_existing_provider", + "parameters": [ + { + "name": "provider_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^provider-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the provider in the format 'provider-'", + "examples": ["provider-123e4567-e89b-42d3-8456-426614174000"], + "title": "Provider Id" + }, + "description": "The ID of the provider in the format 'provider-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/providers/{provider_id}/refresh": { + "patch": { + "tags": ["providers"], + "summary": "Refresh Provider Models", + "description": "Refresh models for a BYOK provider by querying the provider's API.\nAdds new models and removes ones no longer available.", + "operationId": "refresh_provider_models", + "parameters": [ + { + "name": "provider_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 45, + "maxLength": 45, + "pattern": "^provider-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the provider in the format 'provider-'", + "examples": ["provider-123e4567-e89b-42d3-8456-426614174000"], + "title": "Provider Id" + }, + "description": "The ID of the provider in the format 'provider-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Provider" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/": { + "get": { + "tags": ["runs"], + "summary": "List Runs", + "description": "List all runs.", + "operationId": "list_runs", + "parameters": [ + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The unique identifier of the agent associated with the run.", + "title": "Agent Id" + }, + "description": "The unique identifier of the agent associated with the run." + }, + { + "name": "agent_ids", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "The unique identifiers of the agents associated with the run. Deprecated in favor of agent_id field.", + "deprecated": true, + "title": "Agent Ids" + }, + "description": "The unique identifiers of the agents associated with the run. Deprecated in favor of agent_id field.", + "deprecated": true + }, + { + "name": "statuses", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter runs by status. Can specify multiple statuses.", + "title": "Statuses" + }, + "description": "Filter runs by status. Can specify multiple statuses." + }, + { + "name": "background", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "If True, filters for runs that were created in background mode.", + "title": "Background" + }, + "description": "If True, filters for runs that were created in background mode." + }, + { + "name": "stop_reason", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "Filter runs by stop reason.", + "title": "Stop Reason" + }, + "description": "Filter runs by stop reason." + }, + { + "name": "conversation_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter runs by conversation ID.", + "title": "Conversation Id" + }, + "description": "Filter runs by conversation ID." + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Run ID cursor for pagination. Returns runs that come before this run ID in the specified sort order", + "title": "Before" + }, + "description": "Run ID cursor for pagination. Returns runs that come before this run ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Run ID cursor for pagination. Returns runs that come after this run ID in the specified sort order", + "title": "After" + }, + "description": "Run ID cursor for pagination. Returns runs that come after this run ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer", + "maximum": 1000, + "minimum": 1 + }, + { + "type": "null" + } + ], + "description": "Maximum number of runs to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of runs to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for runs by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for runs by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "active", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Filter for active runs.", + "default": false, + "title": "Active" + }, + "description": "Filter for active runs." + }, + { + "name": "ascending", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "description": "Whether to sort agents oldest to newest (True) or newest to oldest (False, default). Deprecated in favor of order field.", + "deprecated": true, + "default": false, + "title": "Ascending" + }, + "description": "Whether to sort agents oldest to newest (True) or newest to oldest (False, default). Deprecated in favor of order field.", + "deprecated": true + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Run" + }, + "title": "Response List Runs" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/active": { + "get": { + "tags": ["runs"], + "summary": "List Active Runs", + "description": "List all active runs.", + "operationId": "list_active_runs", + "deprecated": true, + "parameters": [ + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The unique identifier of the agent associated with the run.", + "title": "Agent Id" + }, + "description": "The unique identifier of the agent associated with the run." + }, + { + "name": "background", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "If True, filters for runs that were created in background mode.", + "title": "Background" + }, + "description": "If True, filters for runs that were created in background mode." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Run" + }, + "title": "Response List Active Runs" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/{run_id}": { + "get": { + "tags": ["runs"], + "summary": "Retrieve Run", + "description": "Get the status of a run.", + "operationId": "retrieve_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Run" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "delete": { + "tags": ["runs"], + "summary": "Delete Run", + "description": "Delete a run by its run_id.", + "operationId": "delete_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/{run_id}/messages": { + "get": { + "tags": ["runs"], + "summary": "List Messages For Run", + "description": "Get response messages associated with a run.", + "operationId": "list_messages_for_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order", + "title": "Before" + }, + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order", + "title": "After" + }, + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "asc", + "title": "Order" + }, + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/LettaMessageUnion" + }, + "title": "Response List Messages For Run" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/{run_id}/usage": { + "get": { + "tags": ["runs"], + "summary": "Retrieve Usage For Run", + "description": "Get usage statistics for a run.", + "operationId": "retrieve_usage_for_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UsageStatistics" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/{run_id}/metrics": { + "get": { + "tags": ["runs"], + "summary": "Retrieve Metrics For Run", + "description": "Get run metrics by run ID.", + "operationId": "retrieve_metrics_for_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RunMetrics" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/{run_id}/steps": { + "get": { + "tags": ["runs"], + "summary": "List Steps For Run", + "description": "Get steps associated with a run with filtering options.", + "operationId": "list_steps_for_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Cursor for pagination", + "title": "Before" + }, + "description": "Cursor for pagination" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Cursor for pagination", + "title": "After" + }, + "description": "Cursor for pagination" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for steps by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for steps by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Step" + }, + "title": "Response List Steps For Run" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/{run_id}/trace": { + "get": { + "tags": ["runs"], + "summary": "Retrieve Trace For Run", + "description": "Retrieve OTEL trace spans for a run.\n\nReturns a filtered set of spans relevant for observability:\n- agent_step: Individual agent reasoning steps\n- tool executions: Tool call spans\n- Root span: The top-level request span\n- time_to_first_token: TTFT measurement span\n\nRequires ClickHouse to be configured for trace storage.", + "operationId": "retrieve_trace_for_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "maximum": 5000, + "minimum": 1, + "description": "Maximum number of spans to return", + "default": 1000, + "title": "Limit" + }, + "description": "Maximum number of spans to return" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + }, + "title": "Response Retrieve Trace For Run" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/runs/{run_id}/stream": { + "post": { + "tags": ["runs"], + "summary": "Retrieve Stream For Run", + "operationId": "retrieve_stream_for_run", + "parameters": [ + { + "name": "run_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Run Id" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RetrieveStreamRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": {} + }, + "text/event-stream": { + "description": "Server-Sent Events stream", + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ReasoningMessage" + }, + { + "$ref": "#/components/schemas/HiddenReasoningMessage" + }, + { + "$ref": "#/components/schemas/ToolCallMessage" + }, + { + "$ref": "#/components/schemas/ToolReturnMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "$ref": "#/components/schemas/ApprovalResponseMessage" + }, + { + "$ref": "#/components/schemas/LettaPing" + }, + { + "$ref": "#/components/schemas/LettaErrorMessage" + }, + { + "$ref": "#/components/schemas/LettaStopReason" + }, + { + "$ref": "#/components/schemas/LettaUsageStatistics" + } + ] + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/steps/": { + "get": { + "tags": ["steps"], + "summary": "List Steps", + "description": "List steps with optional pagination and date filters.", + "operationId": "list_steps", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Return steps before this step ID", + "title": "Before" + }, + "description": "Return steps before this step ID" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Return steps after this step ID", + "title": "After" + }, + "description": "Return steps after this step ID" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of steps to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of steps to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for steps by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for steps by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "start_date", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Return steps after this ISO datetime (e.g. \"2025-01-29T15:01:19-08:00\")", + "title": "Start Date" + }, + "description": "Return steps after this ISO datetime (e.g. \"2025-01-29T15:01:19-08:00\")" + }, + { + "name": "end_date", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Return steps before this ISO datetime (e.g. \"2025-01-29T15:01:19-08:00\")", + "title": "End Date" + }, + "description": "Return steps before this ISO datetime (e.g. \"2025-01-29T15:01:19-08:00\")" + }, + { + "name": "model", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by the name of the model used for the step", + "title": "Model" + }, + "description": "Filter by the name of the model used for the step" + }, + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by the ID of the agent that performed the step", + "title": "Agent Id" + }, + "description": "Filter by the ID of the agent that performed the step" + }, + { + "name": "trace_ids", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by trace ids returned by the server", + "title": "Trace Ids" + }, + "description": "Filter by trace ids returned by the server" + }, + { + "name": "feedback", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "enum": ["positive", "negative"], + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by feedback", + "title": "Feedback" + }, + "description": "Filter by feedback" + }, + { + "name": "has_feedback", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Filter by whether steps have feedback (true) or not (false)", + "title": "Has Feedback" + }, + "description": "Filter by whether steps have feedback (true) or not (false)" + }, + { + "name": "tags", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "null" + } + ], + "description": "Filter by tags", + "title": "Tags" + }, + "description": "Filter by tags" + }, + { + "name": "project_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by the project ID that is associated with the step (cloud only).", + "title": "Project Id" + }, + "description": "Filter by the project ID that is associated with the step (cloud only)." + }, + { + "name": "X-Project", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter by project slug to associate with the group (cloud only).", + "title": "X-Project" + }, + "description": "Filter by project slug to associate with the group (cloud only)." + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Step" + }, + "title": "Response List Steps" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/steps/{step_id}": { + "get": { + "tags": ["steps"], + "summary": "Retrieve Step", + "description": "Get a step by ID.", + "operationId": "retrieve_step", + "parameters": [ + { + "name": "step_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^step-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the step in the format 'step-'", + "examples": ["step-123e4567-e89b-42d3-8456-426614174000"], + "title": "Step Id" + }, + "description": "The ID of the step in the format 'step-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Step" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/steps/{step_id}/metrics": { + "get": { + "tags": ["steps"], + "summary": "Retrieve Metrics For Step", + "description": "Get step metrics by step ID.", + "operationId": "retrieve_metrics_for_step", + "parameters": [ + { + "name": "step_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^step-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the step in the format 'step-'", + "examples": ["step-123e4567-e89b-42d3-8456-426614174000"], + "title": "Step Id" + }, + "description": "The ID of the step in the format 'step-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/StepMetrics" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/steps/{step_id}/trace": { + "get": { + "tags": ["steps"], + "summary": "Retrieve Trace For Step", + "operationId": "retrieve_trace_for_step", + "parameters": [ + { + "name": "step_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^step-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the step in the format 'step-'", + "examples": ["step-123e4567-e89b-42d3-8456-426614174000"], + "title": "Step Id" + }, + "description": "The ID of the step in the format 'step-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/ProviderTrace" + }, + { + "type": "null" + } + ], + "title": "Response Retrieve Trace For Step" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/steps/{step_id}/feedback": { + "patch": { + "tags": ["steps"], + "summary": "Modify Feedback For Step", + "description": "Modify feedback for a given step.", + "operationId": "modify_feedback_for_step", + "parameters": [ + { + "name": "step_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^step-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the step in the format 'step-'", + "examples": ["step-123e4567-e89b-42d3-8456-426614174000"], + "title": "Step Id" + }, + "description": "The ID of the step in the format 'step-'" + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModifyFeedbackRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Step" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/steps/{step_id}/messages": { + "get": { + "tags": ["steps"], + "summary": "List Messages For Step", + "description": "List messages for a given step.", + "operationId": "list_messages_for_step", + "parameters": [ + { + "name": "step_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 41, + "maxLength": 41, + "pattern": "^step-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the step in the format 'step-'", + "examples": ["step-123e4567-e89b-42d3-8456-426614174000"], + "title": "Step Id" + }, + "description": "The ID of the step in the format 'step-'" + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order", + "title": "Before" + }, + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order", + "title": "After" + }, + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "asc", + "title": "Order" + }, + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Sort by field", + "default": "created_at", + "title": "Order By" + }, + "description": "Sort by field" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ReasoningMessage" + }, + { + "$ref": "#/components/schemas/HiddenReasoningMessage" + }, + { + "$ref": "#/components/schemas/ToolCallMessage" + }, + { + "$ref": "#/components/schemas/ToolReturnMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "$ref": "#/components/schemas/ApprovalResponseMessage" + }, + { + "$ref": "#/components/schemas/SummaryMessage" + }, + { + "$ref": "#/components/schemas/EventMessage" + } + ], + "discriminator": { + "propertyName": "message_type", + "mapping": { + "system_message": "#/components/schemas/SystemMessage", + "user_message": "#/components/schemas/UserMessage", + "reasoning_message": "#/components/schemas/ReasoningMessage", + "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage", + "tool_call_message": "#/components/schemas/ToolCallMessage", + "tool_return_message": "#/components/schemas/ToolReturnMessage", + "assistant_message": "#/components/schemas/AssistantMessage", + "approval_request_message": "#/components/schemas/ApprovalRequestMessage", + "approval_response_message": "#/components/schemas/ApprovalResponseMessage", + "summary": "#/components/schemas/SummaryMessage", + "event": "#/components/schemas/EventMessage" + } + } + }, + "title": "Response List Messages For Step" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/tags/": { + "get": { + "tags": ["tag", "admin", "admin"], + "summary": "List Tags", + "description": "Get the list of all tags (from agents and blocks) that have been created.", + "operationId": "list_tags", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Tag cursor for pagination. Returns tags that come before this tag in the specified sort order", + "title": "Before" + }, + "description": "Tag cursor for pagination. Returns tags that come before this tag in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Tag cursor for pagination. Returns tags that come after this tag in the specified sort order", + "title": "After" + }, + "description": "Tag cursor for pagination. Returns tags that come after this tag in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of tags to return", + "default": 50, + "title": "Limit" + }, + "description": "Maximum number of tags to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for tags. 'asc' for alphabetical order, 'desc' for reverse alphabetical order", + "default": "asc", + "title": "Order" + }, + "description": "Sort order for tags. 'asc' for alphabetical order, 'desc' for reverse alphabetical order" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "name", + "type": "string", + "description": "Field to sort by", + "default": "name", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "query_text", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter tags by text search. Deprecated, please use name field instead", + "deprecated": true, + "title": "Query Text" + }, + "description": "Filter tags by text search. Deprecated, please use name field instead", + "deprecated": true + }, + { + "name": "name", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter tags by name", + "title": "Name" + }, + "description": "Filter tags by name" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Response List Tags" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/telemetry/{step_id}": { + "get": { + "tags": ["telemetry"], + "summary": "Retrieve Provider Trace", + "description": "**DEPRECATED**: Use `GET /steps/{step_id}/trace` instead.\n\nRetrieve provider trace by step ID.", + "operationId": "retrieve_provider_trace", + "deprecated": true, + "parameters": [ + { + "name": "step_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Step Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/ProviderTrace" + }, + { + "type": "null" + } + ], + "title": "Response Retrieve Provider Trace" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/messages/": { + "get": { + "tags": ["messages"], + "summary": "List All Messages", + "description": "List messages across all agents for the current user.", + "operationId": "list_all_messages", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order", + "title": "Before" + }, + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order", + "title": "After" + }, + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "conversation_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Conversation ID to filter messages by", + "title": "Conversation Id" + }, + "description": "Conversation ID to filter messages by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/LettaMessageUnion" + }, + "title": "Response List All Messages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/messages/search": { + "post": { + "tags": ["messages"], + "summary": "Search All Messages", + "description": "Search messages across the organization with optional agent filtering.\nReturns messages with FTS/vector ranks and total RRF score.\n\nThis is a cloud-only feature.", + "operationId": "search_all_messages", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SearchAllMessagesRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessageListResult" + }, + { + "$ref": "#/components/schemas/UserMessageListResult" + }, + { + "$ref": "#/components/schemas/ReasoningMessageListResult" + }, + { + "$ref": "#/components/schemas/AssistantMessageListResult" + } + ], + "discriminator": { + "propertyName": "message_type", + "mapping": { + "system_message": "#/components/schemas/SystemMessageListResult", + "user_message": "#/components/schemas/UserMessageListResult", + "reasoning_message": "#/components/schemas/ReasoningMessageListResult", + "assistant_message": "#/components/schemas/AssistantMessageListResult" + } + } + }, + "title": "Response Search All Messages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/messages/batches": { + "post": { + "tags": ["messages"], + "summary": "Create Batch", + "description": "Submit a batch of agent runs for asynchronous processing.\n\nCreates a job that will fan out messages to all listed agents and process them in parallel.\nThe request will be rejected if it exceeds 256MB.", + "operationId": "create_batch", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBatch", + "description": "Messages and config for all agents" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BatchJob" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + }, + "get": { + "tags": ["messages"], + "summary": "List Batches", + "description": "List all batch runs.", + "operationId": "list_batches", + "parameters": [ + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Job ID cursor for pagination. Returns jobs that come before this job ID in the specified sort order", + "title": "Before" + }, + "description": "Job ID cursor for pagination. Returns jobs that come before this job ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Job ID cursor for pagination. Returns jobs that come after this job ID in the specified sort order", + "title": "After" + }, + "description": "Job ID cursor for pagination. Returns jobs that come after this job ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of jobs to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of jobs to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for jobs by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for jobs by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BatchJob" + }, + "title": "Response List Batches" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/messages/batches/{batch_id}": { + "get": { + "tags": ["messages"], + "summary": "Retrieve Batch", + "description": "Retrieve the status and details of a batch run.", + "operationId": "retrieve_batch", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Batch Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BatchJob" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/messages/batches/{batch_id}/messages": { + "get": { + "tags": ["messages"], + "summary": "List Messages For Batch", + "description": "Get response messages for a specific batch job.", + "operationId": "list_messages_for_batch", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Batch Id" + } + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order", + "title": "Before" + }, + "description": "Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order" + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order", + "title": "After" + }, + "description": "Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Maximum number of messages to return", + "default": 100, + "title": "Limit" + }, + "description": "Maximum number of messages to return" + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "enum": ["asc", "desc"], + "type": "string", + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first", + "default": "desc", + "title": "Order" + }, + "description": "Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first" + }, + { + "name": "order_by", + "in": "query", + "required": false, + "schema": { + "const": "created_at", + "type": "string", + "description": "Field to sort by", + "default": "created_at", + "title": "Order By" + }, + "description": "Field to sort by" + }, + { + "name": "agent_id", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter messages by agent ID", + "title": "Agent Id" + }, + "description": "Filter messages by agent ID" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LettaBatchMessages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/messages/batches/{batch_id}/cancel": { + "patch": { + "tags": ["messages"], + "summary": "Cancel Batch", + "description": "Cancel a batch run.", + "operationId": "cancel_batch", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Batch Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/messages/{message_id}": { + "get": { + "tags": ["messages"], + "summary": "Retrieve Message", + "description": "Retrieve a message by ID.", + "operationId": "retrieve_message", + "parameters": [ + { + "name": "message_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "minLength": 44, + "maxLength": 44, + "pattern": "^message-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the message in the format 'message-'", + "examples": ["message-123e4567-e89b-42d3-8456-426614174000"], + "title": "Message Id" + }, + "description": "The ID of the message in the format 'message-'" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/LettaMessageUnion" + }, + "title": "Response Retrieve Message" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/passages/search": { + "post": { + "tags": ["passages"], + "summary": "Search Passages", + "description": "Search passages across the organization with optional agent and archive filtering.\nReturns passages with relevance scores.\n\nThis endpoint supports semantic search through passages:\n- If neither agent_id nor archive_id is provided, searches ALL passages in the organization\n- If agent_id is provided, searches passages across all archives attached to that agent\n- If archive_id is provided, searches passages within that specific archive\n- If both are provided, agent_id takes precedence", + "operationId": "search_passages", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PassageSearchRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/PassageSearchResult" + }, + "title": "Response Search Passages" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/voice-beta/{agent_id}/chat/completions": { + "post": { + "tags": ["voice"], + "summary": "Create Voice Chat Completions", + "description": "DEPRECATED: This voice-beta endpoint has been deprecated.\n\nThe voice functionality has been integrated into the main chat completions endpoint.\nPlease use the standard /v1/agents/{agent_id}/messages endpoint instead.\n\nThis endpoint will be removed in a future version.", + "operationId": "create_voice_chat_completions", + "deprecated": true, + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Agent Id" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": true, + "title": "Completion Request" + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": {} + }, + "text/event-stream": {} + } + }, + "410": { + "description": "Endpoint deprecated", + "content": { + "application/json": { + "example": { + "detail": "This endpoint has been deprecated" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/embeddings/total_storage_size": { + "get": { + "tags": ["embeddings"], + "summary": "Get Embeddings Total Storage Size", + "description": "Get the total size of all embeddings in the database for a user in the storage unit given.", + "operationId": "get_total_storage_size", + "parameters": [ + { + "name": "storage-unit", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "GB", + "title": "Storage Unit" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "number", + "title": "Response Get Total Storage Size" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/v1/agents/search": { + "post": { + "description": "Search deployed agents", + "summary": "Search Deployed Agents", + "tags": ["agents"], + "parameters": [], + "operationId": "agents.searchDeployedAgents", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "search": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["version"] + }, + "value": { + "type": "string" + } + }, + "required": ["field", "value"] + }, + { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["name"] + }, + "operator": { + "type": "string", + "enum": ["eq", "contains"] + }, + "value": { + "type": "string" + } + }, + "required": ["field", "operator", "value"] + }, + { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["tags"] + }, + "operator": { + "type": "string", + "enum": ["contains"] + }, + "value": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["field", "operator", "value"] + }, + { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["identity"] + }, + "operator": { + "type": "string", + "enum": ["eq"] + }, + "value": { + "type": "string" + } + }, + "required": ["field", "operator", "value"] + }, + { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["templateName"] + }, + "operator": { + "type": "string", + "enum": ["eq"] + }, + "value": { + "type": "string" + } + }, + "required": ["field", "operator", "value"] + }, + { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["agentId"] + }, + "operator": { + "type": "string", + "enum": ["eq"] + }, + "value": { + "type": "string" + } + }, + "required": ["field", "operator", "value"] + } + ] + } + }, + "project_id": { + "type": "string" + }, + "combinator": { + "type": "string", + "enum": ["AND"] + }, + "limit": { + "type": "number" + }, + "after": { + "type": "string", + "nullable": true + }, + "sortBy": { + "type": "string", + "enum": ["created_at", "last_run_completion"] + }, + "ascending": { + "type": "boolean" + } + } + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "agents": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentState" + } + }, + "nextCursor": { + "type": "string", + "nullable": true + } + }, + "required": ["agents"] + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/core-memory/variables": { + "get": { + "description": "Get the variables associated with an agent", + "summary": "Retrieve Memory Variables", + "tags": ["agents"], + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "agents.getAgentVariables", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "variables": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": ["variables"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string", + "enum": ["Agent not found"] + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/models/embeddings": { + "get": { + "tags": ["models"], + "parameters": [], + "operationId": "models.listEmbeddingModels", + "responses": { + "200": { + "description": "200" + } + } + } + }, + "/v1/templates/{project_id}/{template_version}/agents": { + "post": { + "description": "Creates an Agent or multiple Agents from a template", + "summary": "Create Agents From Template", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_version", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template version, formatted as {template-name}:{version-number} or {template-name}:latest. This endpoint is not available for self-hosted Letta." + } + ], + "operationId": "templates.createAgentsFromTemplate", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[a-zA-Z0-9-_ ]*$" + }, + "description": "The tags to assign to the agent" + }, + "agent_name": { + "type": "string", + "pattern": "^[a-zA-Z0-9-_ ]*$", + "description": "The name of the agent, optional otherwise a random one will be assigned" + }, + "initial_message_sequence": { + "type": "array", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "enum": ["user", "system", "assistant"] + }, + "content": { + "type": "string" + }, + "name": { + "type": "string", + "nullable": true + }, + "otid": { + "type": "string", + "nullable": true + }, + "sender_id": { + "type": "string", + "nullable": true + }, + "batch_item_id": { + "type": "string", + "nullable": true + }, + "group_id": { + "type": "string", + "nullable": true + } + }, + "required": ["role", "content"] + }, + "description": "Set an initial sequence of messages, if not provided, the agent will start with the default message sequence, if an empty array is provided, the agent will start with no messages" + }, + "memory_variables": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "The memory variables to assign to the agent" + }, + "tool_variables": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "The tool variables to assign to the agent" + }, + "identity_ids": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The identity ids to assign to the agent" + } + } + } + } + } + }, + "responses": { + "201": { + "description": "201" + }, + "402": { + "description": "402", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "limit": { + "type": "number" + } + }, + "required": ["message", "limit"] + } + } + } + } + } + } + }, + "/v1/templates/{template_version}/agents": { + "post": { + "description": "Creates an Agent or multiple Agents from a template", + "summary": "Create Agents From Template", + "tags": ["templates"], + "parameters": [ + { + "name": "template_version", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template version, formatted as {template-name}:{version-number} or {template-name}:latest. This endpoint is not available for self-hosted Letta." + } + ], + "operationId": "templates.createAgentsFromTemplateNoProject", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[a-zA-Z0-9-_ ]*$" + }, + "description": "The tags to assign to the agent" + }, + "agent_name": { + "type": "string", + "pattern": "^[a-zA-Z0-9-_ ]*$", + "description": "The name of the agent, optional otherwise a random one will be assigned" + }, + "initial_message_sequence": { + "type": "array", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "enum": ["user", "system", "assistant"] + }, + "content": { + "type": "string" + }, + "name": { + "type": "string", + "nullable": true + }, + "otid": { + "type": "string", + "nullable": true + }, + "sender_id": { + "type": "string", + "nullable": true + }, + "batch_item_id": { + "type": "string", + "nullable": true + }, + "group_id": { + "type": "string", + "nullable": true + } + }, + "required": ["role", "content"] + }, + "description": "Set an initial sequence of messages, if not provided, the agent will start with the default message sequence, if an empty array is provided, the agent will start with no messages" + }, + "memory_variables": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "The memory variables to assign to the agent" + }, + "tool_variables": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "The tool variables to assign to the agent" + }, + "identity_ids": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The identity ids to assign to the agent" + } + } + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "agent_ids": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Array of created agent IDs" + }, + "group_id": { + "type": "string", + "nullable": true, + "description": "Optional group ID if agents were created in a group" + }, + "deployment_id": { + "type": "string", + "description": "The deployment ID for the created agents" + } + }, + "required": ["agent_ids", "group_id", "deployment_id"], + "description": "Response containing created agent IDs and associated metadata" + } + } + } + }, + "402": { + "description": "402", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "limit": { + "type": "number" + } + }, + "required": ["message", "limit"] + } + } + } + } + } + } + }, + "/v1/templates": { + "get": { + "description": "List all templates", + "summary": "List templates (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "offset", + "in": "query", + "schema": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + } + }, + { + "name": "exact", + "in": "query", + "description": "Whether to search for an exact name match", + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "version", + "in": "query", + "description": "Specify the version you want to return, otherwise will return the latest version", + "schema": { + "type": "string" + } + }, + { + "name": "template_id", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "name", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "search", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "project_slug", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "project_id", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "sort_by", + "in": "query", + "schema": { + "type": "string", + "enum": ["updated_at", "created_at"] + } + } + ], + "operationId": "templates.listTemplates", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "templates": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The exact name of the template" + }, + "id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "project_slug": { + "type": "string" + }, + "latest_version": { + "type": "string", + "description": "The latest version of the template" + }, + "description": { + "type": "string" + }, + "template_deployment_slug": { + "type": "string", + "description": "The full name of the template, including version and project slug" + }, + "updated_at": { + "type": "string", + "description": "When the template was last updated" + } + }, + "required": [ + "name", + "id", + "project_id", + "project_slug", + "latest_version", + "template_deployment_slug", + "updated_at" + ] + } + }, + "has_next_page": { + "type": "boolean" + } + }, + "required": ["templates", "has_next_page"] + } + } + } + } + } + }, + "post": { + "description": "Creates a new template from an existing agent or agent file", + "summary": "Create template (Cloud-only)", + "tags": ["templates"], + "parameters": [], + "operationId": "templates.createTemplateNoProject", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["agent"] + }, + "agent_id": { + "type": "string", + "description": "The ID of the agent to use as a template, can be from any project" + }, + "name": { + "type": "string", + "pattern": "^[a-zA-Z0-9_-]+$", + "description": "Optional custom name for the template. If not provided, a random name will be generated." + } + }, + "required": ["type", "agent_id"], + "summary": "From Agent", + "description": "Create a template from an existing agent" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["agent_file"] + }, + "agent_file": { + "type": "object", + "additionalProperties": { + "nullable": true + }, + "description": "The agent file to use as a template, this should be a JSON file exported from the platform" + }, + "name": { + "type": "string", + "pattern": "^[a-zA-Z0-9_-]+$", + "description": "Optional custom name for the template. If not provided, a random name will be generated." + }, + "update_existing_tools": { + "type": "boolean", + "description": "If true, update existing custom tools source_code and json_schema (source_type cannot be changed)" + } + }, + "required": ["type", "agent_file"], + "summary": "From Agent File", + "description": "Create a template from an uploaded agent file" + } + ], + "summary": "Create template", + "description": "The type of template to create, currently only agent templates are supported" + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The exact name of the template" + }, + "id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "project_slug": { + "type": "string" + }, + "latest_version": { + "type": "string", + "description": "The latest version of the template" + }, + "description": { + "type": "string" + }, + "template_deployment_slug": { + "type": "string", + "description": "The full name of the template, including version and project slug" + }, + "updated_at": { + "type": "string", + "description": "When the template was last updated" + } + }, + "required": [ + "name", + "id", + "project_id", + "project_slug", + "latest_version", + "template_deployment_slug", + "updated_at" + ] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{template_name}": { + "post": { + "description": "Saves the current version of the template as a new version", + "summary": "Save template version (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template version, formatted as {template-name}, any version appended will be ignored" + } + ], + "operationId": "templates.saveTemplateVersion", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "preserve_environment_variables_on_migration": { + "type": "boolean", + "description": "If true, the environment variables will be preserved in the template version when migrating agents" + }, + "preserve_core_memories_on_migration": { + "type": "boolean", + "description": "If true, the core memories will be preserved in the template version when migrating agents" + }, + "preserve_sources_on_migration": { + "type": "boolean", + "description": "If true, existing agent folders/sources will be preserved and merged with template sources during migration. If false, agent sources will be replaced with template sources." + }, + "block_reconciliation_strategy": { + "type": "string", + "enum": ["reconcile-all", "preserve-deleted"], + "description": "Strategy for reconciling memory blocks during migration: \"reconcile-all\" deletes blocks not in the template, \"preserve-deleted\" keeps them. Defaults to \"preserve-deleted\"." + }, + "migrate_agents": { + "type": "boolean", + "description": "If true, existing agents attached to this template will be migrated to the new template version" + }, + "message": { + "type": "string", + "description": "A message to describe the changes made in this template version" + } + } + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The exact name of the template" + }, + "id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "project_slug": { + "type": "string" + }, + "latest_version": { + "type": "string", + "description": "The latest version of the template" + }, + "description": { + "type": "string" + }, + "template_deployment_slug": { + "type": "string", + "description": "The full name of the template, including version and project slug" + }, + "updated_at": { + "type": "string", + "description": "When the template was last updated" + } + }, + "required": [ + "name", + "id", + "project_id", + "project_slug", + "latest_version", + "template_deployment_slug", + "updated_at" + ] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + }, + "delete": { + "description": "Deletes all versions of a template with the specified name", + "summary": "Delete template (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name (without version)" + } + ], + "operationId": "templates.deleteTemplate", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {} + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{template_version}/snapshot": { + "get": { + "description": "Get a snapshot of the template version, this will return the template state at a specific version", + "summary": "Get template snapshot (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_version", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template version, formatted as {template-name}:{version-number} or {template-name}:latest" + } + ], + "operationId": "templates.getTemplateSnapshot", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "agents": { + "type": "array", + "items": { + "type": "object", + "properties": { + "model": { + "type": "string" + }, + "systemPrompt": { + "type": "string" + }, + "toolIds": { + "type": "array", + "items": { + "type": "string" + }, + "nullable": true + }, + "sourceIds": { + "type": "array", + "items": { + "type": "string" + }, + "nullable": true + }, + "memoryVariables": { + "type": "object", + "properties": { + "version": { + "type": "string" + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string" + }, + "defaultValue": { + "type": "string", + "nullable": true + }, + "type": { + "type": "string" + } + }, + "required": ["key", "type"] + } + } + }, + "required": ["version", "data"], + "nullable": true + }, + "toolVariables": { + "type": "object", + "properties": { + "version": { + "type": "string" + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string" + }, + "defaultValue": { + "type": "string", + "nullable": true + }, + "type": { + "type": "string" + } + }, + "required": ["key", "type"] + } + } + }, + "required": ["version", "data"], + "nullable": true + }, + "tags": { + "type": "array", + "items": { + "type": "string" + }, + "nullable": true + }, + "identityIds": { + "type": "array", + "items": { + "type": "string" + }, + "nullable": true + }, + "toolRules": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["constrain_child_tools"] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "children": { + "type": "array", + "items": { + "type": "string" + } + }, + "child_arg_nodes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "args": { + "type": "object", + "additionalProperties": {}, + "nullable": true + } + }, + "required": ["name"] + }, + "nullable": true + } + }, + "required": ["tool_name", "children"] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["run_first"] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "args": { + "type": "object", + "additionalProperties": {}, + "nullable": true + } + }, + "required": ["tool_name"] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["exit_loop"] + }, + "prompt_template": { + "type": "string", + "nullable": true + } + }, + "required": ["tool_name"] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["conditional"] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "default_child": { + "type": "string", + "nullable": true + }, + "child_output_mapping": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "require_output_mapping": { + "type": "boolean" + } + }, + "required": [ + "tool_name", + "child_output_mapping" + ] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["continue_loop"] + }, + "prompt_template": { + "type": "string", + "nullable": true + } + }, + "required": ["tool_name"] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["required_before_exit"] + }, + "prompt_template": { + "type": "string", + "nullable": true + } + }, + "required": ["tool_name"] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["max_count_per_step"] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "max_count_limit": { + "type": "number" + } + }, + "required": ["tool_name", "max_count_limit"] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["parent_last_tool"] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "children": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["tool_name", "children"] + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["requires_approval"] + }, + "prompt_template": { + "type": "string", + "nullable": true + } + }, + "required": ["tool_name"] + } + ] + }, + "nullable": true + }, + "agentType": { + "type": "string", + "enum": [ + "letta_v1_agent", + "memgpt_agent", + "memgpt_v2_agent", + "react_agent", + "workflow_agent", + "split_thread_agent", + "sleeptime_agent", + "voice_convo_agent", + "voice_sleeptime_agent" + ] + }, + "properties": { + "type": "object", + "properties": { + "enable_reasoner": { + "type": "boolean", + "nullable": true + }, + "put_inner_thoughts_in_kwargs": { + "type": "boolean", + "nullable": true + }, + "context_window_limit": { + "type": "number", + "nullable": true + }, + "max_tokens": { + "type": "number", + "nullable": true + }, + "max_reasoning_tokens": { + "type": "number", + "nullable": true + }, + "max_files_open": { + "type": "number", + "nullable": true + }, + "message_buffer_autoclear": { + "type": "boolean", + "nullable": true + }, + "verbosity_level": { + "type": "string", + "enum": ["low", "medium", "high"], + "nullable": true + }, + "reasoning_effort": { + "type": "string", + "enum": [ + "none", + "minimal", + "low", + "medium", + "high", + "xhigh" + ], + "nullable": true + }, + "per_file_view_window_char_limit": { + "type": "number", + "nullable": true + }, + "parallel_tool_calls": { + "type": "boolean", + "nullable": true + }, + "temperature": { + "type": "number", + "nullable": true + } + }, + "required": [ + "enable_reasoner", + "put_inner_thoughts_in_kwargs", + "context_window_limit", + "max_tokens", + "max_reasoning_tokens", + "max_files_open", + "message_buffer_autoclear", + "verbosity_level", + "reasoning_effort", + "per_file_view_window_char_limit", + "parallel_tool_calls", + "temperature" + ], + "nullable": true + }, + "entityId": { + "type": "string" + }, + "name": { + "type": "string" + } + }, + "required": [ + "model", + "systemPrompt", + "toolIds", + "sourceIds", + "memoryVariables", + "toolVariables", + "tags", + "identityIds", + "toolRules", + "agentType", + "properties", + "entityId", + "name" + ] + } + }, + "blocks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "entityId": { + "type": "string" + }, + "label": { + "type": "string" + }, + "value": { + "type": "string" + }, + "limit": { + "type": "number" + }, + "description": { + "type": "string" + }, + "preserveOnMigration": { + "type": "boolean", + "nullable": true + }, + "readOnly": { + "type": "boolean" + } + }, + "required": [ + "entityId", + "label", + "value", + "limit", + "description", + "preserveOnMigration", + "readOnly" + ] + } + }, + "relationships": { + "type": "array", + "items": { + "type": "object", + "properties": { + "agentEntityId": { + "type": "string" + }, + "blockEntityId": { + "type": "string" + } + }, + "required": ["agentEntityId", "blockEntityId"] + } + }, + "configuration": { + "type": "object", + "properties": { + "managerAgentEntityId": { + "type": "string" + }, + "managerType": { + "type": "string" + }, + "terminationToken": { + "type": "string" + }, + "maxTurns": { + "type": "number" + }, + "sleeptimeAgentFrequency": { + "type": "number" + }, + "maxMessageBufferLength": { + "type": "number" + }, + "minMessageBufferLength": { + "type": "number" + } + } + }, + "type": { + "type": "string", + "enum": [ + "classic", + "cluster", + "sleeptime", + "round_robin", + "supervisor", + "dynamic", + "voice_sleeptime" + ] + }, + "version": { + "type": "string" + } + }, + "required": [ + "agents", + "blocks", + "relationships", + "configuration", + "type", + "version" + ] + } + } + } + } + } + }, + "put": { + "description": "Updates the current working version of a template from a snapshot", + "summary": "Set current template from snapshot (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_version", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name with :dev version (e.g., my-template:dev)" + } + ], + "operationId": "templates.setCurrentTemplateFromSnapshot", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "nullable": true, + "description": "The template snapshot to set as the current version" + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "message": { + "type": "string" + } + }, + "required": ["success"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "500": { + "description": "500", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{template_version}/fork": { + "post": { + "description": "Forks a template version into a new template", + "summary": "Fork template (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_version", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template version, formatted as {template-name}:{version-number} or {template-name}:latest" + } + ], + "operationId": "templates.forkTemplate", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "pattern": "^[a-zA-Z0-9_-]+$", + "description": "Optional custom name for the forked template. If not provided, a random name will be generated." + } + } + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The exact name of the template" + }, + "id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "project_slug": { + "type": "string" + }, + "latest_version": { + "type": "string", + "description": "The latest version of the template" + }, + "description": { + "type": "string" + }, + "template_deployment_slug": { + "type": "string", + "description": "The full name of the template, including version and project slug" + }, + "updated_at": { + "type": "string", + "description": "When the template was last updated" + } + }, + "required": [ + "name", + "id", + "project_id", + "project_slug", + "latest_version", + "template_deployment_slug", + "updated_at" + ] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}": { + "post": { + "description": "Creates a new template from an existing agent or agent file", + "summary": "Create template (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + } + ], + "operationId": "templates.createTemplate", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["agent"] + }, + "agent_id": { + "type": "string", + "description": "The ID of the agent to use as a template, can be from any project" + }, + "name": { + "type": "string", + "pattern": "^[a-zA-Z0-9_-]+$", + "description": "Optional custom name for the template. If not provided, a random name will be generated." + } + }, + "required": ["type", "agent_id"], + "summary": "From Agent", + "description": "Create a template from an existing agent" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["agent_file"] + }, + "agent_file": { + "type": "object", + "additionalProperties": { + "nullable": true + }, + "description": "The agent file to use as a template, this should be a JSON file exported from the platform" + }, + "name": { + "type": "string", + "pattern": "^[a-zA-Z0-9_-]+$", + "description": "Optional custom name for the template. If not provided, a random name will be generated." + }, + "update_existing_tools": { + "type": "boolean", + "description": "If true, update existing custom tools source_code and json_schema (source_type cannot be changed)" + } + }, + "required": ["type", "agent_file"], + "summary": "From Agent File", + "description": "Create a template from an uploaded agent file" + } + ], + "summary": "Create template", + "description": "The type of template to create, currently only agent templates are supported" + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The exact name of the template" + }, + "id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "project_slug": { + "type": "string" + }, + "latest_version": { + "type": "string", + "description": "The latest version of the template" + }, + "description": { + "type": "string" + }, + "template_deployment_slug": { + "type": "string", + "description": "The full name of the template, including version and project slug" + }, + "updated_at": { + "type": "string", + "description": "When the template was last updated" + } + }, + "required": [ + "name", + "id", + "project_id", + "project_slug", + "latest_version", + "template_deployment_slug", + "updated_at" + ] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{template_name}": { + "delete": { + "description": "Deletes all versions of a template with the specified name", + "summary": "Delete template (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name (without version)" + } + ], + "operationId": "templates.deleteTemplateNoProject", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {} + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + }, + "patch": { + "description": "Updates the current working version of a template from an agent file", + "summary": "Update current template from agent file (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name (without version)" + } + ], + "operationId": "templates.updateCurrentTemplateFromAgentFileNoProject", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "agent_file_json": { + "type": "object", + "additionalProperties": { + "nullable": true + }, + "description": "The agent file to update the current template version from" + }, + "update_existing_tools": { + "default": false, + "type": "boolean", + "description": "If true, update existing custom tools source_code and json_schema (source_type cannot be changed)" + }, + "save_existing_changes": { + "default": false, + "type": "boolean", + "description": "If true, Letta will automatically save any changes as a version before updating the template" + } + }, + "required": ["agent_file_json"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "message": { + "type": "string" + } + }, + "required": ["success"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "500": { + "description": "500", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{template_name}/name": { + "patch": { + "description": "Renames all versions of a template with the specified name. Versions are automatically stripped from the current template name if accidentally included.", + "summary": "Rename template (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The current template name (version will be automatically stripped if included)" + } + ], + "operationId": "templates.renameTemplate", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "new_name": { + "type": "string", + "pattern": "^[a-zA-Z0-9_-]+$", + "description": "The new name for the template" + } + }, + "required": ["new_name"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "409": { + "description": "409", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{template_name}/description": { + "patch": { + "description": "Updates the description for all versions of a template with the specified name. Versions are automatically stripped from the current template name if accidentally included.", + "summary": "Update template description (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name (version will be automatically stripped if included)" + } + ], + "operationId": "templates.updateTemplateDescription", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "The new description for the template" + } + } + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{name}/versions": { + "get": { + "description": "List all versions of a specific template", + "summary": "List template versions (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name (without version)" + }, + { + "name": "offset", + "in": "query", + "schema": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "templates.listTemplateVersions", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "versions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "The version number" + }, + "created_at": { + "type": "string", + "description": "When the version was created" + }, + "message": { + "type": "string", + "description": "Version description message" + }, + "is_latest": { + "type": "boolean", + "description": "Whether this is the latest version" + } + }, + "required": ["version", "created_at", "is_latest"] + } + }, + "has_next_page": { + "type": "boolean" + }, + "total_count": { + "type": "number" + } + }, + "required": ["versions", "has_next_page", "total_count"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{template_name}/deployments/{deployment_id}/migrate": { + "post": { + "description": "Migrates a deployment to a specific template version", + "summary": "Migrate deployment to template version (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name (without version)" + }, + { + "name": "deployment_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The deployment ID to migrate" + } + ], + "operationId": "templates.migrateDeployment", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "The target template version to migrate to" + }, + "preserve_tool_variables": { + "type": "boolean", + "description": "Whether to preserve existing tool variables during migration" + }, + "preserve_core_memories": { + "type": "boolean", + "description": "Whether to preserve existing core memories during migration" + }, + "preserve_sources": { + "type": "boolean", + "description": "If true, existing agent sources will be preserved and merged with template sources during migration. If false, agent sources will be replaced with template sources." + }, + "memory_variables": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Additional memory variables to apply during migration" + } + }, + "required": ["version"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "message": { + "type": "string" + } + }, + "required": ["success"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "500": { + "description": "500", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/templates/{project_id}/{template_name}/agent-file": { + "put": { + "description": "Updates the current working version of a template from an agent file", + "summary": "Update current template from agent file (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "project_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The project id" + }, + { + "name": "template_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template name (without version)" + } + ], + "operationId": "templates.updateCurrentTemplateFromAgentFile", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "agent_file_json": { + "type": "object", + "additionalProperties": { + "nullable": true + }, + "description": "The agent file to update the current template version from" + }, + "update_existing_tools": { + "default": false, + "type": "boolean", + "description": "If true, update existing custom tools source_code and json_schema (source_type cannot be changed)" + }, + "save_existing_changes": { + "default": false, + "type": "boolean", + "description": "If true, Letta will automatically save any changes as a version before updating the template" + } + }, + "required": ["agent_file_json"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "message": { + "type": "string" + } + }, + "required": ["success"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "500": { + "description": "500", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/legacy-templates/{templateId}/migrates": { + "post": { + "description": "Migrates a template from a legacy project to the default project. Only works if the template is currently in a legacy project.", + "summary": "Migrate template from legacy project (Cloud-only)", + "tags": ["templates"], + "parameters": [ + { + "name": "templateId", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The template ID" + } + ], + "operationId": "templates.legacyMigration", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {} + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "message": { + "type": "string" + } + }, + "required": ["success"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/client-side-access-tokens": { + "post": { + "description": "Create a new client side access token with the specified configuration.", + "summary": "Create token (Cloud-only)", + "tags": ["clientSideAccessTokens"], + "parameters": [], + "operationId": "clientSideAccessTokens.createClientSideAccessToken", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "policy": { + "type": "array", + "items": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["agent"] + }, + "id": { + "type": "string" + }, + "access": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "read_messages", + "write_messages", + "read_agent", + "write_agent" + ] + } + } + }, + "required": ["type", "id", "access"] + } + ] + } + }, + "hostname": { + "type": "string", + "format": "uri", + "pattern": "^(http|https):\\/\\/", + "description": "The hostname of the client side application. Please specify the full URL including the protocol (http or https)." + }, + "expires_at": { + "type": "string", + "description": "The expiration date of the token. If not provided, the token will expire in 5 minutes" + } + }, + "required": ["policy", "hostname"] + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "policy": { + "type": "object", + "properties": { + "version": { + "type": "string", + "enum": ["1"] + }, + "data": { + "type": "array", + "items": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["agent"] + }, + "id": { + "type": "string" + }, + "access": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "read_messages", + "write_messages", + "read_agent", + "write_agent" + ] + } + } + }, + "required": ["type", "id", "access"] + } + ] + } + } + }, + "required": ["version", "data"] + }, + "token": { + "type": "string" + }, + "hostname": { + "type": "string" + }, + "expiresAt": { + "type": "string" + } + }, + "required": ["policy", "token", "hostname", "expiresAt"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + }, + "get": { + "description": "List all client side access tokens for the current account. This is only available for cloud users.", + "summary": "List tokens (Cloud-only)", + "tags": ["clientSideAccessTokens"], + "parameters": [ + { + "name": "agentId", + "in": "query", + "description": "The agent ID to filter tokens by. If provided, only tokens for this agent will be returned.", + "schema": { + "type": "string" + } + }, + { + "name": "offset", + "in": "query", + "description": "The offset for pagination. Defaults to 0.", + "schema": { + "default": 0, + "type": "number" + } + }, + { + "name": "limit", + "in": "query", + "description": "The number of tokens to return per page. Defaults to 10.", + "schema": { + "default": 10, + "type": "number" + } + } + ], + "operationId": "clientSideAccessTokens.listClientSideAccessTokens", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "tokens": { + "type": "array", + "items": { + "type": "object", + "properties": { + "policy": { + "type": "object", + "properties": { + "version": { + "type": "string", + "enum": ["1"] + }, + "data": { + "type": "array", + "items": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["agent"] + }, + "id": { + "type": "string" + }, + "access": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "read_messages", + "write_messages", + "read_agent", + "write_agent" + ] + } + } + }, + "required": ["type", "id", "access"] + } + ] + } + } + }, + "required": ["version", "data"] + }, + "token": { + "type": "string" + }, + "hostname": { + "type": "string" + }, + "expiresAt": { + "type": "string" + } + }, + "required": ["policy", "token", "hostname", "expiresAt"] + } + }, + "hasNextPage": { + "type": "boolean" + } + }, + "required": ["tokens", "hasNextPage"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/client-side-access-tokens/{token}": { + "delete": { + "description": "Delete a client side access token.", + "summary": "Delete token (Cloud-only)", + "tags": ["clientSideAccessTokens"], + "parameters": [ + { + "name": "token", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The access token to delete" + } + ], + "operationId": "clientSideAccessTokens.deleteClientSideAccessToken", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": {} + } + } + }, + "responses": { + "204": { + "description": "204", + "content": { + "application/json": { + "schema": {} + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/projects": { + "get": { + "description": "List all projects", + "summary": "List Projects (Cloud-only)", + "tags": ["projects"], + "parameters": [ + { + "name": "name", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "offset", + "in": "query", + "schema": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "projects.listProjects", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "projects": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "slug": { + "type": "string" + }, + "id": { + "type": "string" + } + }, + "required": ["name", "slug", "id"] + } + }, + "hasNextPage": { + "type": "boolean" + } + }, + "required": ["projects", "hasNextPage"] + } + } + } + } + } + }, + "post": { + "description": "Create a new project", + "summary": "Create Project (Cloud-only)", + "tags": ["projects"], + "parameters": [], + "operationId": "projects.createProject", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "minLength": 3, + "maxLength": 50 + } + }, + "required": ["name"] + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "slug": { + "type": "string" + }, + "id": { + "type": "string" + } + }, + "required": ["name", "slug", "id"] + } + } + } + } + } + } + }, + "/v1/projects/{projectId}": { + "delete": { + "description": "Delete a project by ID", + "summary": "Delete Project (Cloud-only)", + "tags": ["projects"], + "parameters": [ + { + "name": "projectId", + "in": "path", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "operationId": "projects.deleteProject", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "nullable": true + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + } + } + }, + "/v1/metadata/balance": { + "get": { + "description": "Retrieve the current usage balances for the organization.", + "summary": "Retrieve current organization balance", + "tags": ["metadata"], + "parameters": [], + "operationId": "metadata.retrieveCurrentBalances", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "total_balance": { + "type": "number" + }, + "monthly_credit_balance": { + "type": "number" + }, + "purchased_credit_balance": { + "type": "number" + }, + "billing_tier": { + "type": "string" + } + }, + "required": [ + "total_balance", + "monthly_credit_balance", + "purchased_credit_balance", + "billing_tier" + ] + } + } + } + } + } + } + }, + "/v1/metadata/feedback": { + "post": { + "description": "Send feedback from users to improve our services.", + "summary": "Send user feedback", + "tags": ["metadata"], + "parameters": [], + "operationId": "metadata.sendFeedback", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string", + "minLength": 1, + "maxLength": 10000 + }, + "feature": { + "default": "letta-code", + "type": "string", + "enum": ["letta-code", "sdk"] + }, + "agent_id": { + "type": "string" + }, + "session_id": { + "type": "string" + }, + "version": { + "type": "string" + }, + "platform": { + "type": "string" + }, + "settings": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + } + } + }, + "/v1/metadata/telemetry": { + "post": { + "description": "Send telemetry events for usage tracking and analysis.", + "summary": "Send telemetry event", + "tags": ["metadata"], + "parameters": [], + "operationId": "metadata.sendTelemetry", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "type": "string", + "enum": ["letta-code"] + }, + "events": { + "type": "array", + "items": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["session_start"] + }, + "timestamp": { + "type": "string" + }, + "data": { + "type": "object", + "properties": { + "session_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "startup_command": { + "type": "string" + }, + "version": { + "type": "string" + }, + "platform": { + "type": "string" + }, + "node_version": { + "type": "string" + } + }, + "required": ["session_id", "startup_command"] + } + }, + "required": ["type", "timestamp", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["session_end"] + }, + "timestamp": { + "type": "string" + }, + "data": { + "type": "object", + "properties": { + "session_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "duration": { + "type": "number" + }, + "message_count": { + "type": "number" + }, + "tool_call_count": { + "type": "number" + }, + "exit_reason": { + "type": "string" + }, + "total_api_ms": { + "type": "number" + }, + "total_wall_ms": { + "type": "number" + }, + "prompt_tokens": { + "type": "number" + }, + "completion_tokens": { + "type": "number" + }, + "total_tokens": { + "type": "number" + }, + "cached_tokens": { + "type": "number" + }, + "reasoning_tokens": { + "type": "number" + }, + "step_count": { + "type": "number" + } + }, + "required": [ + "session_id", + "duration", + "message_count", + "tool_call_count" + ] + } + }, + "required": ["type", "timestamp", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["tool_usage"] + }, + "timestamp": { + "type": "string" + }, + "data": { + "type": "object", + "properties": { + "session_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "tool_name": { + "type": "string" + }, + "success": { + "type": "boolean" + }, + "duration": { + "type": "number" + }, + "response_length": { + "type": "number" + }, + "error_type": { + "type": "string" + }, + "stderr": { + "type": "string" + } + }, + "required": [ + "session_id", + "tool_name", + "success", + "duration" + ] + } + }, + "required": ["type", "timestamp", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["error"] + }, + "timestamp": { + "type": "string" + }, + "data": { + "type": "object", + "properties": { + "session_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "run_id": { + "type": "string" + }, + "error_type": { + "type": "string" + }, + "error_message": { + "type": "string" + }, + "context": { + "type": "string" + }, + "http_status": { + "type": "number" + }, + "model_id": { + "type": "string" + } + }, + "required": [ + "session_id", + "error_type", + "error_message" + ] + } + }, + "required": ["type", "timestamp", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["user_input"] + }, + "timestamp": { + "type": "string" + }, + "data": { + "type": "object", + "properties": { + "session_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "input_length": { + "type": "number" + }, + "is_command": { + "type": "boolean" + }, + "command_name": { + "type": "string" + }, + "message_type": { + "type": "string" + }, + "model_id": { + "type": "string" + } + }, + "required": [ + "session_id", + "input_length", + "is_command", + "message_type", + "model_id" + ] + } + }, + "required": ["type", "timestamp", "data"] + } + ] + }, + "minItems": 1 + } + }, + "required": ["service", "events"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + } + } + }, + "/v1/metadata/status": { + "get": { + "summary": "Gets your Letta Cloud status", + "tags": ["metadata"], + "parameters": [], + "operationId": "metadata.getStatus", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "current_project_id": { + "type": "string", + "nullable": true + } + }, + "required": ["current_project_id"] + } + } + } + } + } + } + }, + "/v1/metadata/user": { + "get": { + "description": "Retrieve information about the current authenticated user including email, name, organization, and current project.", + "summary": "Get current user information", + "tags": ["metadata"], + "parameters": [], + "operationId": "metadata.getUser", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "email": { + "type": "string" + }, + "name": { + "type": "string" + }, + "organization_name": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "current_project_name": { + "type": "string", + "nullable": true + }, + "current_project_id": { + "type": "string", + "nullable": true + } + }, + "required": [ + "email", + "name", + "organization_name", + "organization_id", + "current_project_name", + "current_project_id" + ] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/schedule": { + "post": { + "description": "Schedule a message to be sent by the agent at a specified time or on a recurring basis.", + "summary": "Schedule Agent Message", + "tags": ["scheduledMessages"], + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "scheduledMessages.scheduleAgentMessage", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "signature": { + "type": "string", + "nullable": true + }, + "type": { + "type": "string", + "enum": ["text"] + } + }, + "required": ["text"] + }, + { + "type": "object", + "properties": { + "source": { + "type": "object", + "properties": { + "data": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "detail": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["base64"] + } + }, + "required": ["data", "media_type"] + }, + "type": { + "type": "string", + "enum": ["image"] + } + }, + "required": ["source", "type"] + } + ] + } + }, + { + "type": "string" + } + ] + }, + "role": { + "type": "string", + "enum": ["user", "assistant", "system"] + }, + "name": { + "type": "string" + }, + "otid": { + "type": "string" + }, + "sender_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["message"] + } + }, + "required": ["content", "role"] + } + }, + "max_steps": { + "type": "number" + }, + "callback_url": { + "type": "string", + "format": "uri" + }, + "include_return_message_types": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "system_message", + "user_message", + "assistant_message", + "reasoning_message", + "hidden_reasoning_message", + "tool_call_message", + "tool_return_message", + "approval_request_message", + "approval_response_message" + ] + } + }, + "schedule": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["one-time"] + }, + "scheduled_at": { + "type": "number" + } + }, + "required": ["scheduled_at"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["recurring"] + }, + "cron_expression": { + "type": "string" + } + }, + "required": ["type", "cron_expression"] + } + ] + } + }, + "required": ["messages", "schedule"] + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "next_scheduled_at": { + "type": "string" + } + }, + "required": ["id"] + } + } + } + } + } + }, + "get": { + "description": "List all scheduled messages for a specific agent.", + "summary": "List Scheduled Agent Messages", + "tags": ["scheduledMessages"], + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "after", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "scheduledMessages.listScheduledMessages", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "scheduled_messages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "message": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "signature": { + "type": "string", + "nullable": true + }, + "type": { + "type": "string", + "enum": ["text"] + } + }, + "required": ["text"] + }, + { + "type": "object", + "properties": { + "source": { + "type": "object", + "properties": { + "data": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "detail": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["base64"] + } + }, + "required": [ + "data", + "media_type" + ] + }, + "type": { + "type": "string", + "enum": ["image"] + } + }, + "required": ["source", "type"] + } + ] + } + }, + { + "type": "string" + } + ] + }, + "role": { + "type": "string", + "enum": ["user", "assistant", "system"] + }, + "name": { + "type": "string" + }, + "otid": { + "type": "string" + }, + "sender_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["message"] + } + }, + "required": ["content", "role"] + } + }, + "max_steps": { + "type": "number" + }, + "callback_url": { + "type": "string", + "format": "uri" + }, + "include_return_message_types": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "system_message", + "user_message", + "assistant_message", + "reasoning_message", + "hidden_reasoning_message", + "tool_call_message", + "tool_return_message", + "approval_request_message", + "approval_response_message" + ] + } + } + }, + "required": ["messages"] + }, + "schedule": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["one-time"] + }, + "scheduled_at": { + "type": "number" + } + }, + "required": ["scheduled_at"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["recurring"] + }, + "cron_expression": { + "type": "string" + } + }, + "required": ["type", "cron_expression"] + } + ] + }, + "next_scheduled_time": { + "type": "string", + "nullable": true + } + }, + "required": [ + "id", + "agent_id", + "message", + "schedule", + "next_scheduled_time" + ] + } + }, + "has_next_page": { + "type": "boolean" + } + }, + "required": ["scheduled_messages", "has_next_page"] + } + } + } + } + } + } + }, + "/v1/agents/{agent_id}/schedule/{scheduled_message_id}": { + "delete": { + "description": "Delete a scheduled message by its ID for a specific agent.", + "summary": "Delete Scheduled Agent Message", + "tags": ["scheduledMessages"], + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "scheduled_message_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "scheduledMessages.deleteScheduledMessage", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {}, + "nullable": true + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean", + "enum": [true] + } + }, + "required": ["success"] + } + } + } + } + } + }, + "get": { + "description": "Retrieve a scheduled message by its ID for a specific agent.", + "summary": "Retrieve Scheduled Agent Message", + "tags": ["scheduledMessages"], + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "scheduled_message_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "scheduledMessages.retrieveScheduledMessage", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "message": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "signature": { + "type": "string", + "nullable": true + }, + "type": { + "type": "string", + "enum": ["text"] + } + }, + "required": ["text"] + }, + { + "type": "object", + "properties": { + "source": { + "type": "object", + "properties": { + "data": { + "type": "string" + }, + "media_type": { + "type": "string" + }, + "detail": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["base64"] + } + }, + "required": ["data", "media_type"] + }, + "type": { + "type": "string", + "enum": ["image"] + } + }, + "required": ["source", "type"] + } + ] + } + }, + { + "type": "string" + } + ] + }, + "role": { + "type": "string", + "enum": ["user", "assistant", "system"] + }, + "name": { + "type": "string" + }, + "otid": { + "type": "string" + }, + "sender_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["message"] + } + }, + "required": ["content", "role"] + } + }, + "max_steps": { + "type": "number" + }, + "callback_url": { + "type": "string", + "format": "uri" + }, + "include_return_message_types": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "system_message", + "user_message", + "assistant_message", + "reasoning_message", + "hidden_reasoning_message", + "tool_call_message", + "tool_return_message", + "approval_request_message", + "approval_response_message" + ] + } + } + }, + "required": ["messages"] + }, + "schedule": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["one-time"] + }, + "scheduled_at": { + "type": "number" + } + }, + "required": ["scheduled_at"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["recurring"] + }, + "cron_expression": { + "type": "string" + } + }, + "required": ["type", "cron_expression"] + } + ] + }, + "next_scheduled_time": { + "type": "string", + "nullable": true + } + }, + "required": [ + "id", + "agent_id", + "message", + "schedule", + "next_scheduled_time" + ] + } + } + } + } + } + } + }, + "/v1/feeds": { + "post": { + "description": "Create a new feed in a project", + "summary": "Create Feed", + "tags": ["feeds"], + "parameters": [], + "operationId": "feeds.createFeed", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "project_id": { + "type": "string" + }, + "name": { + "type": "string", + "minLength": 1, + "maxLength": 100 + }, + "description": { + "type": "string", + "maxLength": 500 + } + }, + "required": ["project_id", "name"] + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string", + "nullable": true + }, + "project_id": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "created_by_id": { + "type": "string", + "nullable": true + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "description", + "project_id", + "organization_id", + "created_by_id", + "created_at", + "updated_at" + ] + } + } + } + } + } + }, + "get": { + "description": "List all feeds with optional filters and pagination", + "summary": "List Feeds", + "tags": ["feeds"], + "parameters": [ + { + "name": "project_id", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "name", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "offset", + "in": "query", + "schema": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + } + } + ], + "operationId": "feeds.listFeeds", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "feeds": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string", + "nullable": true + }, + "project_id": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + }, + "subscriptions_count": { + "type": "number" + } + }, + "required": [ + "id", + "name", + "description", + "project_id", + "organization_id", + "created_at", + "updated_at", + "subscriptions_count" + ] + } + }, + "has_next_page": { + "type": "boolean" + } + }, + "required": ["feeds", "has_next_page"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}": { + "get": { + "description": "Retrieve feed details by ID", + "summary": "Get Feed", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.getFeed", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string", + "nullable": true + }, + "project_id": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "created_by_id": { + "type": "string", + "nullable": true + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + }, + "subscriptions_count": { + "type": "number" + }, + "messages_count": { + "type": "number" + } + }, + "required": [ + "id", + "name", + "description", + "project_id", + "organization_id", + "created_by_id", + "created_at", + "updated_at", + "subscriptions_count" + ] + } + } + } + } + } + }, + "delete": { + "description": "Soft delete a feed and clean up its sequence", + "summary": "Delete Feed", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.deleteFeed", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {}, + "nullable": true + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/messages": { + "post": { + "description": "Batch insert messages into a feed (up to 10,000 per request)", + "summary": "Publish Messages", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.publishMessages", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "type": "string" + } + }, + "required": ["content"] + }, + "minItems": 1, + "maxItems": 10000 + } + }, + "required": ["messages"] + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "inserted_count": { + "type": "number" + } + }, + "required": ["inserted_count"] + } + } + } + } + } + }, + "get": { + "description": "List messages from a feed (for debugging/inspection)", + "summary": "List Feed Messages", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "after_sequence", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.listMessages", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "feed_id": { + "type": "string" + }, + "sequence": { + "type": "number" + }, + "content_preview": { + "type": "string" + }, + "is_truncated": { + "type": "boolean" + }, + "content_size_bytes": { + "type": "number" + }, + "expires_at": { + "type": "string" + }, + "created_at": { + "type": "string" + } + }, + "required": [ + "id", + "feed_id", + "sequence", + "content_preview", + "is_truncated", + "content_size_bytes", + "expires_at", + "created_at" + ] + } + }, + "has_next_page": { + "type": "boolean" + }, + "next_cursor": { + "type": "number", + "nullable": true + } + }, + "required": ["messages", "has_next_page", "next_cursor"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/messages/{message_id}": { + "get": { + "description": "Get full content of a feed message", + "summary": "Get Feed Message", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "message_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.getMessage", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "feed_id": { + "type": "string" + }, + "sequence": { + "type": "number" + }, + "content": { + "type": "string" + }, + "content_size_bytes": { + "type": "number" + }, + "expires_at": { + "type": "string" + }, + "created_at": { + "type": "string" + } + }, + "required": [ + "id", + "feed_id", + "sequence", + "content", + "content_size_bytes", + "expires_at", + "created_at" + ] + } + }, + "required": ["message"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/subscribe": { + "post": { + "description": "Subscribe an agent to a feed with polling configuration", + "summary": "Subscribe Agent to Feed", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.subscribeAgent", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "agent_id": { + "type": "string" + }, + "cron_schedule": { + "type": "string" + }, + "prompt_template": { + "type": "string" + } + }, + "required": ["agent_id", "cron_schedule"] + } + } + } + }, + "responses": { + "201": { + "description": "201", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "feed_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "agent_name": { + "type": "string", + "nullable": true + }, + "cron_schedule": { + "type": "string" + }, + "merge_strategy": { + "type": "string", + "enum": ["unique-messages", "combine-into-single-message"] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "next_scheduled_at": { + "type": "string" + }, + "last_consumed_sequence": { + "type": "number" + }, + "last_consumed_at": { + "type": "string", + "nullable": true + }, + "disabled_at": { + "type": "string", + "nullable": true + }, + "created_at": { + "type": "string" + } + }, + "required": [ + "id", + "feed_id", + "agent_id", + "agent_name", + "cron_schedule", + "merge_strategy", + "prompt_template", + "next_scheduled_at", + "last_consumed_sequence", + "last_consumed_at", + "disabled_at", + "created_at" + ] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": [ + "agentAlreadySubscribed", + "agentNotInProject", + "invalidCronExpression" + ] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/subscriptions/{subscription_id}": { + "patch": { + "description": "Update subscription configuration (cron schedule, enable/disable)", + "summary": "Update Subscription", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "subscription_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.updateSubscription", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "cron_schedule": { + "type": "string" + }, + "prompt_template": { + "type": "string" + }, + "disabled": { + "type": "boolean" + } + } + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "feed_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "agent_name": { + "type": "string", + "nullable": true + }, + "cron_schedule": { + "type": "string" + }, + "merge_strategy": { + "type": "string", + "enum": ["unique-messages", "combine-into-single-message"] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "next_scheduled_at": { + "type": "string" + }, + "last_consumed_sequence": { + "type": "number" + }, + "last_consumed_at": { + "type": "string", + "nullable": true + }, + "disabled_at": { + "type": "string", + "nullable": true + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + } + }, + "required": [ + "id", + "feed_id", + "agent_id", + "agent_name", + "cron_schedule", + "merge_strategy", + "prompt_template", + "next_scheduled_at", + "last_consumed_sequence", + "last_consumed_at", + "disabled_at", + "created_at", + "updated_at" + ] + } + } + } + } + } + }, + "delete": { + "description": "Remove agent subscription from a feed (by subscription_id)", + "summary": "Delete Subscription", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "subscription_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.deleteSubscription", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {}, + "nullable": true + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/unsubscribe": { + "post": { + "description": "Remove agent subscription from a feed (by agent_id)", + "summary": "Unsubscribe Agent from Feed", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.unsubscribeAgent", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "agent_id": { + "type": "string" + } + }, + "required": ["agent_id"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/subscriptions/{subscription_id}/trigger": { + "post": { + "description": "Immediately trigger a subscription to process pending messages", + "summary": "Trigger Subscription", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "subscription_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.triggerSubscription", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {} + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "messages_sent": { + "type": "number" + } + }, + "required": ["success", "messages_sent"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/subscriptions/{subscription_id}/backfill": { + "post": { + "description": "Start a background job to send historical messages to an agent subscription. Returns immediately with workflow ID. Does not update last_consumed_sequence.", + "summary": "Backfill Subscription", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "subscription_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.backfillSubscription", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "from_sequence": { + "type": "number" + }, + "to_sequence": { + "type": "number" + } + } + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "workflow_id": { + "type": "string" + } + }, + "required": ["workflow_id"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["feedNotFound", "subscriptionNotFound"] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/subscriptions/{subscription_id}/history": { + "get": { + "description": "List the run history for a subscription including scheduled runs, manual triggers, and backfills.", + "summary": "List Subscription History", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "subscription_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "page_size", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "next_page_token", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.listSubscriptionHistory", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "runs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "workflow_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["scheduled", "manual", "backfill"] + }, + "status": { + "type": "string", + "enum": [ + "running", + "completed", + "failed", + "canceled", + "timed_out" + ] + }, + "started_at": { + "type": "string" + }, + "completed_at": { + "type": "string", + "nullable": true + } + }, + "required": [ + "workflow_id", + "type", + "status", + "started_at", + "completed_at" + ] + } + }, + "next_page_token": { + "type": "string", + "nullable": true + } + }, + "required": ["runs", "next_page_token"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["feedNotFound", "subscriptionNotFound"] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/subscriptions/cron": { + "patch": { + "description": "Update the cron schedule for all subscriptions of a feed", + "summary": "Update All Subscriptions Cron Schedule", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.updateAllSubscriptionsCron", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "cron_schedule": { + "type": "string" + } + }, + "required": ["cron_schedule"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "updated_count": { + "type": "number" + } + }, + "required": ["updated_count"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/feeds/{feed_id}/subscriptions": { + "get": { + "description": "List all agent subscriptions for a feed", + "summary": "List Feed Subscriptions", + "tags": ["feeds"], + "parameters": [ + { + "name": "feed_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "offset", + "in": "query", + "schema": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + } + }, + { + "name": "agent_id", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "feeds.listSubscriptions", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "subscriptions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "feed_id": { + "type": "string" + }, + "agent_id": { + "type": "string" + }, + "agent_name": { + "type": "string", + "nullable": true + }, + "cron_schedule": { + "type": "string" + }, + "merge_strategy": { + "type": "string", + "enum": [ + "unique-messages", + "combine-into-single-message" + ] + }, + "prompt_template": { + "type": "string", + "nullable": true + }, + "next_scheduled_at": { + "type": "string" + }, + "last_consumed_sequence": { + "type": "number" + }, + "last_consumed_at": { + "type": "string", + "nullable": true + }, + "disabled_at": { + "type": "string", + "nullable": true + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + } + }, + "required": [ + "id", + "feed_id", + "agent_id", + "agent_name", + "cron_schedule", + "merge_strategy", + "prompt_template", + "next_scheduled_at", + "last_consumed_sequence", + "last_consumed_at", + "disabled_at", + "created_at", + "updated_at" + ] + } + }, + "has_next_page": { + "type": "boolean" + } + }, + "required": ["subscriptions", "has_next_page"] + } + } + } + } + } + } + }, + "/v1/pipelines": { + "post": { + "description": "Create a new pipeline (producer + feed + optionally subscribers)", + "summary": "Create Pipeline", + "tags": ["pipelines"], + "parameters": [], + "operationId": "pipelines.createPipeline", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "integration_type": { + "type": "string", + "enum": [ + "slack", + "discord", + "microsoftTeams", + "custom_webhook" + ] + }, + "producer_config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + }, + "subscriber_agent_ids": { + "type": "array", + "items": { + "type": "string" + } + }, + "subscriber_cron_schedule": { + "type": "string" + }, + "prompt_template": { + "type": "string" + } + }, + "required": [ + "name", + "project_id", + "integration_type", + "producer_config" + ] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "pipeline": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "integration_id": { + "type": "string", + "nullable": true + }, + "integration_type": { + "type": "string", + "enum": [ + "slack", + "discord", + "microsoftTeams", + "custom_webhook" + ] + }, + "feed_id": { + "type": "string" + }, + "config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + }, + "next_scheduled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "last_run_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "disabled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "integration_display_name": { + "type": "string", + "nullable": true + }, + "feed_name": { + "type": "string" + }, + "subscriber_count": { + "type": "number" + }, + "error_count": { + "type": "number" + }, + "project_name": { + "type": "string" + }, + "project_slug": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "organization_id", + "project_id", + "integration_id", + "integration_type", + "feed_id", + "config", + "next_scheduled_at", + "last_run_at", + "disabled_at", + "created_at", + "updated_at" + ] + } + }, + "required": ["pipeline"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": [ + "integrationNotFound", + "invalidProducerConfig", + "agentNotFound" + ] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + }, + "get": { + "description": "List all pipelines for the organization with optional filtering", + "summary": "List Pipelines", + "tags": ["pipelines"], + "parameters": [ + { + "name": "search", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "integration_type", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "integration_id", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "offset", + "in": "query", + "schema": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + } + }, + { + "name": "limit", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.listPipelines", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "pipelines": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "integration_id": { + "type": "string", + "nullable": true + }, + "integration_type": { + "type": "string", + "enum": [ + "slack", + "discord", + "microsoftTeams", + "custom_webhook" + ] + }, + "feed_id": { + "type": "string" + }, + "config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + }, + "next_scheduled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "last_run_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "disabled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "integration_display_name": { + "type": "string", + "nullable": true + }, + "feed_name": { + "type": "string" + }, + "subscriber_count": { + "type": "number" + }, + "error_count": { + "type": "number" + }, + "project_name": { + "type": "string" + }, + "project_slug": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "organization_id", + "project_id", + "integration_id", + "integration_type", + "feed_id", + "config", + "next_scheduled_at", + "last_run_at", + "disabled_at", + "created_at", + "updated_at" + ] + } + }, + "hasNextPage": { + "type": "boolean" + } + }, + "required": ["pipelines", "hasNextPage"] + } + } + } + } + } + } + }, + "/v1/pipelines/count": { + "get": { + "description": "Get the total count of pipelines, optionally filtered by project and search", + "summary": "Count Pipelines", + "tags": ["pipelines"], + "parameters": [ + { + "name": "search", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "integration_type", + "in": "query", + "schema": { + "type": "string" + } + }, + { + "name": "integration_id", + "in": "query", + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.countPipelines", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "count": { + "type": "number" + } + }, + "required": ["count"] + } + } + } + } + } + } + }, + "/v1/pipelines/{pipeline_id}": { + "get": { + "description": "Get a single pipeline with details", + "summary": "Get Pipeline", + "tags": ["pipelines"], + "parameters": [ + { + "name": "pipeline_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.getPipeline", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "pipeline": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "integration_id": { + "type": "string", + "nullable": true + }, + "integration_type": { + "type": "string", + "enum": [ + "slack", + "discord", + "microsoftTeams", + "custom_webhook" + ] + }, + "feed_id": { + "type": "string" + }, + "config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + }, + "next_scheduled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "last_run_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "disabled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "integration_display_name": { + "type": "string", + "nullable": true + }, + "feed_name": { + "type": "string" + }, + "subscriber_count": { + "type": "number" + }, + "error_count": { + "type": "number" + }, + "project_name": { + "type": "string" + }, + "project_slug": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "organization_id", + "project_id", + "integration_id", + "integration_type", + "feed_id", + "config", + "next_scheduled_at", + "last_run_at", + "disabled_at", + "created_at", + "updated_at" + ] + } + }, + "required": ["pipeline"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["pipelineNotFound"] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + }, + "patch": { + "description": "Update pipeline name or disable/enable it", + "summary": "Update Pipeline", + "tags": ["pipelines"], + "parameters": [ + { + "name": "pipeline_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.updatePipeline", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "disabled": { + "type": "boolean" + } + } + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "pipeline": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "integration_id": { + "type": "string", + "nullable": true + }, + "integration_type": { + "type": "string", + "enum": [ + "slack", + "discord", + "microsoftTeams", + "custom_webhook" + ] + }, + "feed_id": { + "type": "string" + }, + "config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + }, + "next_scheduled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "last_run_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "disabled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "integration_display_name": { + "type": "string", + "nullable": true + }, + "feed_name": { + "type": "string" + }, + "subscriber_count": { + "type": "number" + }, + "error_count": { + "type": "number" + }, + "project_name": { + "type": "string" + }, + "project_slug": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "organization_id", + "project_id", + "integration_id", + "integration_type", + "feed_id", + "config", + "next_scheduled_at", + "last_run_at", + "disabled_at", + "created_at", + "updated_at" + ] + } + }, + "required": ["pipeline"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["pipelineNotFound"] + } + }, + "required": ["message"] + } + } + } + } + } + }, + "delete": { + "description": "Soft delete a pipeline and cascade to feed + subscriptions", + "summary": "Delete Pipeline", + "tags": ["pipelines"], + "parameters": [ + { + "name": "pipeline_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.deletePipeline", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {} + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["pipelineNotFound"] + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/pipelines/{pipeline_id}/config": { + "patch": { + "description": "Update the producer configuration for a pipeline (e.g., Slack channels)", + "summary": "Update Pipeline Producer Config", + "tags": ["pipelines"], + "parameters": [ + { + "name": "pipeline_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.updatePipelineProducerConfig", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "producer_config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + } + }, + "required": ["producer_config"] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "pipeline": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "organization_id": { + "type": "string" + }, + "project_id": { + "type": "string" + }, + "integration_id": { + "type": "string", + "nullable": true + }, + "integration_type": { + "type": "string", + "enum": [ + "slack", + "discord", + "microsoftTeams", + "custom_webhook" + ] + }, + "feed_id": { + "type": "string" + }, + "config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + }, + "next_scheduled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "last_run_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "disabled_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "integration_display_name": { + "type": "string", + "nullable": true + }, + "feed_name": { + "type": "string" + }, + "subscriber_count": { + "type": "number" + }, + "error_count": { + "type": "number" + }, + "project_name": { + "type": "string" + }, + "project_slug": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "organization_id", + "project_id", + "integration_id", + "integration_type", + "feed_id", + "config", + "next_scheduled_at", + "last_run_at", + "disabled_at", + "created_at", + "updated_at" + ] + } + }, + "required": ["pipeline"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["invalidProducerConfig", "configTypeMismatch"] + } + }, + "required": ["message", "errorCode"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["pipelineNotFound"] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + } + }, + "/v1/pipelines/preview": { + "post": { + "description": "Fetch sample messages from integration to preview what agents will receive", + "summary": "Preview Pipeline", + "tags": ["pipelines"], + "parameters": [], + "operationId": "pipelines.previewPipeline", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "integration_type": { + "type": "string", + "enum": [ + "slack", + "discord", + "microsoftTeams", + "custom_webhook" + ] + }, + "integration_id": { + "type": "string" + }, + "producer_config": { + "discriminator": { + "propertyName": "type" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["slack_channel_reader"] + }, + "data": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_id": { + "type": "string" + }, + "channel_name": { + "type": "string" + }, + "last_message_ts": { + "type": "string" + } + }, + "required": ["channel_id"] + }, + "minItems": 1, + "maxItems": 100 + }, + "max_messages_per_poll": { + "type": "number" + } + }, + "required": ["channels"] + } + }, + "required": ["type", "data"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["custom_webhook"] + }, + "data": { + "type": "object", + "properties": {} + } + }, + "required": ["type", "data"] + } + ] + } + }, + "required": [ + "integration_type", + "integration_id", + "producer_config" + ] + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "sampleMessages": { + "type": "array", + "items": { + "type": "string" + } + }, + "messageCount": { + "type": "number" + } + }, + "required": ["sampleMessages", "messageCount"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": [ + "integrationNotFound", + "invalidProducerConfig", + "tokenExpired" + ] + } + }, + "required": ["message"] + } + } + } + } + } + } + }, + "/v1/pipelines/{pipeline_id}/sync": { + "post": { + "description": "Manually trigger a pipeline sync to fetch new messages immediately", + "summary": "Sync Pipeline", + "tags": ["pipelines"], + "parameters": [ + { + "name": "pipeline_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.syncPipeline", + "requestBody": { + "description": "Body", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {} + } + } + } + }, + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "messages_ingested": { + "type": "number" + }, + "workflow_id": { + "type": "string" + } + }, + "required": ["success", "messages_ingested", "workflow_id"] + } + } + } + }, + "400": { + "description": "400", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": [ + "pipelineDisabled", + "pipelineNotSyncable", + "syncFailed" + ] + } + }, + "required": ["message", "errorCode"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["pipelineNotFound"] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + } + }, + "/v1/pipelines/{pipeline_id}/sync/history": { + "get": { + "description": "List the sync run history for a pipeline from Temporal with error details", + "summary": "List Pipeline Sync History", + "tags": ["pipelines"], + "parameters": [ + { + "name": "pipeline_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "operationId": "pipelines.listPipelineSyncHistory", + "responses": { + "200": { + "description": "200", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "runs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "workflow_id": { + "type": "string" + }, + "status": { + "type": "string", + "enum": [ + "running", + "completed", + "failed", + "canceled", + "timed_out" + ] + }, + "started_at": { + "type": "string", + "format": "date-time" + }, + "completed_at": { + "type": "string", + "format": "date-time", + "nullable": true + }, + "error": { + "type": "object", + "properties": { + "error_type": { + "type": "string" + }, + "error_message": { + "type": "string" + } + }, + "required": ["error_type", "error_message"], + "nullable": true + } + }, + "required": [ + "workflow_id", + "status", + "started_at", + "completed_at", + "error" + ] + } + }, + "next_page_token": { + "type": "string", + "nullable": true + } + }, + "required": ["runs", "next_page_token"] + } + } + } + }, + "404": { + "description": "404", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string", + "enum": ["pipelineNotFound"] + } + }, + "required": ["message", "errorCode"] + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "AgentEnvironmentVariable": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "id": { + "type": "string", + "pattern": "^agent-env-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Agent-env", + "examples": ["agent-env-123e4567-e89b-12d3-a456-426614174000"] + }, + "key": { + "type": "string", + "title": "Key", + "description": "The name of the environment variable." + }, + "value": { + "type": "string", + "title": "Value", + "description": "The value of the environment variable." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "An optional description of the environment variable." + }, + "value_enc": { + "anyOf": [ + { + "type": "string", + "description": "Encrypted secret value (stored as encrypted string)", + "nullable": true + }, + { + "type": "null" + } + ], + "title": "Value Enc", + "description": "Encrypted value as Secret object" + }, + "agent_id": { + "type": "string", + "title": "Agent Id", + "description": "The ID of the agent this environment variable belongs to." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["key", "value", "agent_id"], + "title": "AgentEnvironmentVariable" + }, + "AgentFileAttachment": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "Unique identifier of the file-agent relationship" + }, + "file_id": { + "type": "string", + "title": "File Id", + "description": "Unique identifier of the file" + }, + "file_name": { + "type": "string", + "title": "File Name", + "description": "Name of the file" + }, + "folder_id": { + "type": "string", + "title": "Folder Id", + "description": "Unique identifier of the folder/source" + }, + "folder_name": { + "type": "string", + "title": "Folder Name", + "description": "Name of the folder/source" + }, + "is_open": { + "type": "boolean", + "title": "Is Open", + "description": "Whether the file is currently open in the agent's context" + }, + "last_accessed_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Last Accessed At", + "description": "Timestamp of last access by the agent" + }, + "visible_content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Visible Content", + "description": "Portion of the file visible to the agent if open" + }, + "start_line": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Start Line", + "description": "Starting line number if file was opened with line range" + }, + "end_line": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "End Line", + "description": "Ending line number if file was opened with line range" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "id", + "file_id", + "file_name", + "folder_id", + "folder_name", + "is_open" + ], + "title": "AgentFileAttachment", + "description": "Response model for agent file attachments showing file status in agent context" + }, + "AgentFileSchema": { + "properties": { + "agents": { + "items": { + "$ref": "#/components/schemas/letta__schemas__agent_file__AgentSchema" + }, + "type": "array", + "title": "Agents", + "description": "List of agents in this agent file" + }, + "groups": { + "items": { + "$ref": "#/components/schemas/GroupSchema" + }, + "type": "array", + "title": "Groups", + "description": "List of groups in this agent file" + }, + "blocks": { + "items": { + "$ref": "#/components/schemas/BlockSchema" + }, + "type": "array", + "title": "Blocks", + "description": "List of memory blocks in this agent file" + }, + "files": { + "items": { + "$ref": "#/components/schemas/FileSchema" + }, + "type": "array", + "title": "Files", + "description": "List of files in this agent file" + }, + "sources": { + "items": { + "$ref": "#/components/schemas/SourceSchema" + }, + "type": "array", + "title": "Sources", + "description": "List of sources in this agent file" + }, + "tools": { + "items": { + "$ref": "#/components/schemas/letta__schemas__agent_file__ToolSchema" + }, + "type": "array", + "title": "Tools", + "description": "List of tools in this agent file" + }, + "mcp_servers": { + "items": { + "$ref": "#/components/schemas/MCPServerSchema" + }, + "type": "array", + "title": "Mcp Servers", + "description": "List of MCP servers in this agent file" + }, + "metadata": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Metadata", + "description": "Metadata for this agent file, including revision_id and other export information." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the object was created." + } + }, + "type": "object", + "required": [ + "agents", + "groups", + "blocks", + "files", + "sources", + "tools", + "mcp_servers" + ], + "title": "AgentFileSchema", + "description": "Schema for serialized agent file that can be exported to JSON and imported into agent server." + }, + "AgentState": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "id": { + "type": "string", + "title": "Id", + "description": "The id of the agent. Assigned by the database." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the agent." + }, + "tool_rules": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/ChildToolRule" + }, + { + "$ref": "#/components/schemas/InitToolRule" + }, + { + "$ref": "#/components/schemas/TerminalToolRule" + }, + { + "$ref": "#/components/schemas/ConditionalToolRule" + }, + { + "$ref": "#/components/schemas/ContinueToolRule" + }, + { + "$ref": "#/components/schemas/RequiredBeforeExitToolRule" + }, + { + "$ref": "#/components/schemas/MaxCountPerStepToolRule" + }, + { + "$ref": "#/components/schemas/ParentToolRule" + }, + { + "$ref": "#/components/schemas/RequiresApprovalToolRule" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "conditional": "#/components/schemas/ConditionalToolRule", + "constrain_child_tools": "#/components/schemas/ChildToolRule", + "continue_loop": "#/components/schemas/ContinueToolRule", + "exit_loop": "#/components/schemas/TerminalToolRule", + "max_count_per_step": "#/components/schemas/MaxCountPerStepToolRule", + "parent_last_tool": "#/components/schemas/ParentToolRule", + "required_before_exit": "#/components/schemas/RequiredBeforeExitToolRule", + "requires_approval": "#/components/schemas/RequiresApprovalToolRule", + "run_first": "#/components/schemas/InitToolRule" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Rules", + "description": "The list of tool rules." + }, + "message_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Message Ids", + "description": "The ids of the messages in the agent's in-context memory." + }, + "system": { + "type": "string", + "title": "System", + "description": "The system prompt used by the agent." + }, + "agent_type": { + "$ref": "#/components/schemas/AgentType", + "description": "The type of agent." + }, + "llm_config": { + "$ref": "#/components/schemas/LLMConfig", + "description": "Deprecated: Use `model` field instead. The LLM configuration used by the agent.", + "deprecated": true + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", + "deprecated": true + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle used by the agent (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/ZAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + }, + { + "$ref": "#/components/schemas/OpenRouterModelSettings" + }, + { + "$ref": "#/components/schemas/ChatGPTOAuthModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "chatgpt_oauth": "#/components/schemas/ChatGPTOAuthModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "openrouter": "#/components/schemas/OpenRouterModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings", + "zai": "#/components/schemas/ZAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "The model settings used by the agent." + }, + "compaction_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionSettings-Output" + }, + { + "type": "null" + } + ], + "description": "The compaction settings configuration used for compaction." + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format used by the agent" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the agent." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the agent." + }, + "memory": { + "$ref": "#/components/schemas/Memory", + "description": "Deprecated: Use `blocks` field instead. The in-context memory of the agent.", + "deprecated": true + }, + "blocks": { + "items": { + "$ref": "#/components/schemas/Block" + }, + "type": "array", + "title": "Blocks", + "description": "The memory blocks used by the agent." + }, + "tools": { + "items": { + "$ref": "#/components/schemas/Tool" + }, + "type": "array", + "title": "Tools", + "description": "The tools used by the agent." + }, + "sources": { + "items": { + "$ref": "#/components/schemas/Source" + }, + "type": "array", + "title": "Sources", + "description": "Deprecated: Use `folders` field instead. The sources used by the agent.", + "deprecated": true + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags", + "description": "The tags associated with the agent." + }, + "tool_exec_environment_variables": { + "items": { + "$ref": "#/components/schemas/AgentEnvironmentVariable" + }, + "type": "array", + "title": "Tool Exec Environment Variables", + "description": "Deprecated: use `secrets` field instead.", + "deprecated": true + }, + "secrets": { + "items": { + "$ref": "#/components/schemas/AgentEnvironmentVariable" + }, + "type": "array", + "title": "Secrets", + "description": "The environment variables for tool execution specific to this agent." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The id of the project the agent belongs to." + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template the agent belongs to." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id of the agent." + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id", + "description": "The id of the entity within the template." + }, + "identity_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Identity Ids", + "description": "Deprecated: Use `identities` field instead. The ids of the identities associated with this agent.", + "default": [], + "deprecated": true + }, + "identities": { + "items": { + "$ref": "#/components/schemas/Identity" + }, + "type": "array", + "title": "Identities", + "description": "The identities associated with this agent.", + "default": [] + }, + "pending_approval": { + "anyOf": [ + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "type": "null" + } + ], + "description": "The latest approval request message pending for this agent, if any." + }, + "message_buffer_autoclear": { + "type": "boolean", + "title": "Message Buffer Autoclear", + "description": "If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.", + "default": false + }, + "enable_sleeptime": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Sleeptime", + "description": "If set to True, memory management will move to a background agent thread." + }, + "multi_agent_group": { + "anyOf": [ + { + "$ref": "#/components/schemas/Group" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `managed_group` field instead. The multi-agent group that this agent manages.", + "deprecated": true + }, + "managed_group": { + "anyOf": [ + { + "$ref": "#/components/schemas/Group" + }, + { + "type": "null" + } + ], + "description": "The multi-agent group that this agent manages" + }, + "last_run_completion": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Last Run Completion", + "description": "The timestamp when the agent last completed a run." + }, + "last_run_duration_ms": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Last Run Duration Ms", + "description": "The duration in milliseconds of the agent's last run." + }, + "last_stop_reason": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "The stop reason from the agent's last run." + }, + "timezone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The timezone of the agent (IANA format)." + }, + "max_files_open": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Files Open", + "description": "Maximum number of files that can be open at once for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "per_file_view_window_char_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Per File View Window Char Limit", + "description": "The per-file view window character limit for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the agent will be hidden." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "id", + "name", + "system", + "agent_type", + "llm_config", + "memory", + "blocks", + "tools", + "sources", + "tags" + ], + "title": "AgentState", + "description": "Representation of an agent's state. This is the state of the agent at a given time, and is persisted in the DB backend. The state has all the information needed to recreate a persisted agent." + }, + "AgentType": { + "type": "string", + "enum": [ + "memgpt_agent", + "memgpt_v2_agent", + "letta_v1_agent", + "react_agent", + "workflow_agent", + "split_thread_agent", + "sleeptime_agent", + "voice_convo_agent", + "voice_sleeptime_agent" + ], + "title": "AgentType", + "description": "Enum to represent the type of agent." + }, + "Annotation": { + "properties": { + "type": { + "type": "string", + "const": "url_citation", + "title": "Type" + }, + "url_citation": { + "$ref": "#/components/schemas/AnnotationURLCitation" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["type", "url_citation"], + "title": "Annotation", + "description": "A URL citation when using web search." + }, + "AnnotationURLCitation": { + "properties": { + "end_index": { + "type": "integer", + "title": "End Index" + }, + "start_index": { + "type": "integer", + "title": "Start Index" + }, + "title": { + "type": "string", + "title": "Title" + }, + "url": { + "type": "string", + "title": "Url" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["end_index", "start_index", "title", "url"], + "title": "AnnotationURLCitation", + "description": "A URL citation when using web search." + }, + "AnthropicModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "anthropic", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "anthropic" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 1 + }, + "thinking": { + "$ref": "#/components/schemas/AnthropicThinking", + "description": "The thinking configuration for the model.", + "default": { + "type": "enabled", + "budget_tokens": 1024 + } + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + }, + "verbosity": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Verbosity", + "description": "Soft control for how verbose model output should be, used for GPT-5 models." + }, + "effort": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Effort", + "description": "Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'." + }, + "strict": { + "type": "boolean", + "title": "Strict", + "description": "Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.", + "default": false + } + }, + "type": "object", + "title": "AnthropicModelSettings" + }, + "AnthropicThinking": { + "properties": { + "type": { + "type": "string", + "enum": ["enabled", "disabled"], + "title": "Type", + "description": "The type of thinking to use.", + "default": "enabled" + }, + "budget_tokens": { + "type": "integer", + "title": "Budget Tokens", + "description": "The maximum number of tokens the model can use for extended thinking.", + "default": 1024 + } + }, + "type": "object", + "title": "AnthropicThinking" + }, + "ApprovalCreate": { + "properties": { + "type": { + "type": "string", + "const": "approval", + "title": "Type", + "description": "The message type to be created.", + "default": "approval" + }, + "approvals": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/ApprovalReturn" + }, + { + "$ref": "#/components/schemas/letta__schemas__letta_message__ToolReturn" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "approval": "#/components/schemas/ApprovalReturn", + "tool": "#/components/schemas/letta__schemas__letta_message__ToolReturn" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Approvals", + "description": "The list of approval responses" + }, + "approve": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Approve", + "description": "Whether the tool has been approved", + "deprecated": true + }, + "approval_request_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Approval Request Id", + "description": "The message ID of the approval request", + "deprecated": true + }, + "reason": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Reason", + "description": "An optional explanation for the provided approval status", + "deprecated": true + }, + "group_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Group Id", + "description": "The multi-agent group that the message was sent in" + } + }, + "type": "object", + "title": "ApprovalCreate", + "description": "Input to approve or deny a tool call request" + }, + "ApprovalRequestMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "approval_request_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "approval_request_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "tool_call": { + "anyOf": [ + { + "$ref": "#/components/schemas/ToolCall" + }, + { + "$ref": "#/components/schemas/ToolCallDelta" + } + ], + "title": "Tool Call", + "description": "The tool call that has been requested by the llm to run", + "deprecated": true + }, + "tool_calls": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ToolCall" + }, + "type": "array" + }, + { + "$ref": "#/components/schemas/ToolCallDelta" + }, + { + "type": "null" + } + ], + "title": "Tool Calls", + "description": "The tool calls that have been requested by the llm to run, which are pending approval" + } + }, + "type": "object", + "required": ["id", "date", "tool_call"], + "title": "ApprovalRequestMessage", + "description": "A message representing a request for approval to call a tool (generated by the LLM to trigger tool execution).\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n tool_call (ToolCall): The tool call" + }, + "ApprovalResponseMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "approval_response_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "approval_response_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "approvals": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/ApprovalReturn" + }, + { + "$ref": "#/components/schemas/letta__schemas__letta_message__ToolReturn" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "approval": "#/components/schemas/ApprovalReturn", + "tool": "#/components/schemas/letta__schemas__letta_message__ToolReturn" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Approvals", + "description": "The list of approval responses" + }, + "approve": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Approve", + "description": "Whether the tool has been approved", + "deprecated": true + }, + "approval_request_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Approval Request Id", + "description": "The message ID of the approval request", + "deprecated": true + }, + "reason": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Reason", + "description": "An optional explanation for the provided approval status", + "deprecated": true + } + }, + "type": "object", + "required": ["id", "date"], + "title": "ApprovalResponseMessage", + "description": "A message representing a response form the user indicating whether a tool has been approved to run.\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n approve: (bool) Whether the tool has been approved\n approval_request_id: The ID of the approval request\n reason: (Optional[str]) An optional explanation for the provided approval status" + }, + "ApprovalReturn": { + "properties": { + "type": { + "type": "string", + "const": "approval", + "title": "Type", + "description": "The message type to be created.", + "default": "approval" + }, + "tool_call_id": { + "type": "string", + "title": "Tool Call Id", + "description": "The ID of the tool call that corresponds to this approval" + }, + "approve": { + "type": "boolean", + "title": "Approve", + "description": "Whether the tool has been approved" + }, + "reason": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Reason", + "description": "An optional explanation for the provided approval status" + } + }, + "type": "object", + "required": ["tool_call_id", "approve"], + "title": "ApprovalReturn" + }, + "ArchivalMemorySearchResponse": { + "properties": { + "results": { + "items": { + "$ref": "#/components/schemas/ArchivalMemorySearchResult" + }, + "type": "array", + "title": "Results", + "description": "List of search results matching the query" + }, + "count": { + "type": "integer", + "title": "Count", + "description": "Total number of results returned" + } + }, + "type": "object", + "required": ["results", "count"], + "title": "ArchivalMemorySearchResponse" + }, + "ArchivalMemorySearchResult": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "Unique identifier of the archival memory passage" + }, + "timestamp": { + "type": "string", + "title": "Timestamp", + "description": "Timestamp of when the memory was created, formatted in agent's timezone" + }, + "content": { + "type": "string", + "title": "Content", + "description": "Text content of the archival memory passage" + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags", + "description": "List of tags associated with this memory" + } + }, + "type": "object", + "required": ["id", "timestamp", "content"], + "title": "ArchivalMemorySearchResult" + }, + "Archive": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The creation date of the archive" + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the archive" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "A description of the archive" + }, + "vector_db_provider": { + "$ref": "#/components/schemas/VectorDBProvider", + "description": "The vector database provider used for this archive's passages", + "default": "native" + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "Embedding configuration for passages in this archive" + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Additional metadata" + }, + "id": { + "type": "string", + "pattern": "^archive-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Archive", + "examples": ["archive-123e4567-e89b-12d3-a456-426614174000"] + } + }, + "additionalProperties": false, + "type": "object", + "required": ["created_at", "name", "organization_id"], + "title": "Archive", + "description": "Representation of an archive - a collection of archival passages that can be shared between agents." + }, + "ArchiveCreateRequest": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `embedding` field instead. Embedding configuration for the archive", + "deprecated": true + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "Embedding model handle for the archive" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + } + }, + "type": "object", + "required": ["name"], + "title": "ArchiveCreateRequest", + "description": "Request model for creating an archive.\n\nIntentionally excludes vector_db_provider. These are derived internally (vector DB provider from env)." + }, + "ArchiveUpdateRequest": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + } + }, + "type": "object", + "title": "ArchiveUpdateRequest", + "description": "Request model for updating an archive (partial).\n\nSupports updating only name and description." + }, + "AssistantMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "assistant_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "assistant_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaAssistantMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The message content sent by the agent (can be a string or an array of content parts)" + } + }, + "type": "object", + "required": ["id", "date", "content"], + "title": "AssistantMessage", + "description": "A message sent by the LLM in response to user input. Used in the LLM context.\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n content (Union[str, List[LettaAssistantMessageContentUnion]]): The message content sent by the agent (can be a string or an array of content parts)" + }, + "AssistantMessageListResult": { + "properties": { + "message_type": { + "type": "string", + "const": "assistant_message", + "title": "Message Type", + "default": "assistant_message" + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaAssistantMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The message content sent by the assistant (can be a string or an array of content parts)" + }, + "message_id": { + "type": "string", + "title": "Message Id", + "description": "The unique identifier of the message." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent that owns the message." + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The unique identifier of the conversation that the message belongs to." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The time the message was created in ISO format." + } + }, + "type": "object", + "required": ["content", "message_id", "created_at"], + "title": "AssistantMessageListResult", + "description": "Assistant message list result with agent context.\n\nShape is identical to UpdateAssistantMessage but includes the owning agent_id and message id." + }, + "Audio": { + "properties": { + "id": { + "type": "string", + "title": "Id" + } + }, + "type": "object", + "required": ["id"], + "title": "Audio", + "description": "Data about a previous audio response from the model.\n[Learn more](https://platform.openai.com/docs/guides/audio)." + }, + "AuthRequest": { + "properties": { + "password": { + "type": "string", + "title": "Password", + "description": "Admin password provided when starting the Letta server" + } + }, + "type": "object", + "title": "AuthRequest" + }, + "AuthResponse": { + "properties": { + "uuid": { + "type": "string", + "format": "uuid", + "title": "Uuid", + "description": "UUID of the user" + }, + "is_admin": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Admin", + "description": "Whether the user is an admin" + } + }, + "type": "object", + "required": ["uuid"], + "title": "AuthResponse" + }, + "AzureModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "azure", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "azure" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "AzureModelSettings", + "description": "Azure OpenAI model configuration (OpenAI-compatible)." + }, + "Base64Image": { + "properties": { + "type": { + "type": "string", + "const": "base64", + "title": "Type", + "description": "The source type for the image.", + "default": "base64" + }, + "media_type": { + "type": "string", + "title": "Media Type", + "description": "The media type for the image." + }, + "data": { + "type": "string", + "title": "Data", + "description": "The base64 encoded image data." + }, + "detail": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Detail", + "description": "What level of detail to use when processing and understanding the image (low, high, or auto to let the model decide)" + } + }, + "type": "object", + "required": ["media_type", "data"], + "title": "Base64Image" + }, + "BaseToolRuleSchema": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name" + }, + "type": { + "type": "string", + "title": "Type" + } + }, + "type": "object", + "required": ["tool_name", "type"], + "title": "BaseToolRuleSchema" + }, + "BatchJob": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The unix timestamp of when the job was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "status": { + "$ref": "#/components/schemas/JobStatus", + "description": "The status of the job.", + "default": "created" + }, + "completed_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Completed At", + "description": "The unix timestamp of when the job was completed." + }, + "stop_reason": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "The reason why the job was stopped." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the job." + }, + "job_type": { + "$ref": "#/components/schemas/JobType", + "default": "batch" + }, + "background": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Background", + "description": "Whether the job was created in background mode." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The agent associated with this job/run." + }, + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Callback Url", + "description": "If set, POST to this URL when the job completes." + }, + "callback_sent_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Callback Sent At", + "description": "Timestamp when the callback was last attempted." + }, + "callback_status_code": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Callback Status Code", + "description": "HTTP status code returned by the callback endpoint." + }, + "callback_error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Callback Error", + "description": "Optional error message from attempting to POST the callback endpoint." + }, + "ttft_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Ttft Ns", + "description": "Time to first token for a run in nanoseconds" + }, + "total_duration_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Duration Ns", + "description": "Total run duration in nanoseconds" + }, + "id": { + "type": "string", + "pattern": "^(job|run)-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Job", + "examples": ["job-123e4567-e89b-12d3-a456-426614174000"] + } + }, + "additionalProperties": false, + "type": "object", + "title": "BatchJob" + }, + "BedrockModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "bedrock", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "bedrock" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "BedrockModelSettings", + "description": "AWS Bedrock model configuration." + }, + "Block": { + "properties": { + "value": { + "type": "string", + "title": "Value", + "description": "Value of the block." + }, + "limit": { + "type": "integer", + "title": "Limit", + "description": "Character limit of the block.", + "default": 20000 + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name", + "description": "Name of the block if it is a template." + }, + "is_template": { + "type": "boolean", + "title": "Is Template", + "description": "Whether the block is a template (e.g. saved human/persona options).", + "default": false + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id of the block." + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id", + "description": "The id of the entity within the template." + }, + "preserve_on_migration": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Preserve On Migration", + "description": "Preserve the block on template migration.", + "default": false + }, + "label": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Label", + "description": "Label of the block (e.g. 'human', 'persona') in the context window." + }, + "read_only": { + "type": "boolean", + "title": "Read Only", + "description": "Whether the agent has read-only access to the block.", + "default": false + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of the block." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata of the block.", + "default": {} + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the block will be hidden." + }, + "id": { + "type": "string", + "pattern": "^block-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Block", + "examples": ["block-123e4567-e89b-12d3-a456-426614174000"] + }, + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this Block." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that last updated this Block." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags associated with the block.", + "default": [] + } + }, + "type": "object", + "required": ["value"], + "title": "Block", + "description": "A Block represents a reserved section of the LLM's context window." + }, + "BlockResponse": { + "properties": { + "value": { + "type": "string", + "title": "Value", + "description": "Value of the block." + }, + "limit": { + "type": "integer", + "title": "Limit", + "description": "Character limit of the block.", + "default": 20000 + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name", + "description": "(Deprecated) The name of the block template (if it is a template).", + "deprecated": true + }, + "is_template": { + "type": "boolean", + "title": "Is Template", + "description": "Whether the block is a template (e.g. saved human/persona options).", + "default": false + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "(Deprecated) The id of the template.", + "deprecated": true + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "(Deprecated) The base template id of the block.", + "deprecated": true + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "(Deprecated) The id of the deployment.", + "deprecated": true + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id", + "description": "(Deprecated) The id of the entity within the template.", + "deprecated": true + }, + "preserve_on_migration": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Preserve On Migration", + "description": "(Deprecated) Preserve the block on template migration.", + "default": false, + "deprecated": true + }, + "label": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Label", + "description": "Label of the block (e.g. 'human', 'persona') in the context window." + }, + "read_only": { + "type": "boolean", + "title": "Read Only", + "description": "(Deprecated) Whether the agent has read-only access to the block.", + "default": false, + "deprecated": true + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of the block." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata of the block.", + "default": {} + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "(Deprecated) If set to True, the block will be hidden.", + "deprecated": true + }, + "id": { + "type": "string", + "title": "Id", + "description": "The id of the block." + }, + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this Block." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that last updated this Block." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags associated with the block.", + "default": [] + } + }, + "type": "object", + "required": ["value", "id"], + "title": "BlockResponse" + }, + "BlockSchema": { + "properties": { + "value": { + "type": "string", + "title": "Value", + "description": "Value of the block." + }, + "limit": { + "type": "integer", + "title": "Limit", + "description": "Character limit of the block.", + "default": 20000 + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name", + "description": "Name of the block if it is a template." + }, + "is_template": { + "type": "boolean", + "title": "Is Template", + "default": false + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id of the block." + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id", + "description": "The id of the entity within the template." + }, + "preserve_on_migration": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Preserve On Migration", + "description": "Preserve the block on template migration.", + "default": false + }, + "label": { + "type": "string", + "title": "Label", + "description": "Label of the block." + }, + "read_only": { + "type": "boolean", + "title": "Read Only", + "description": "Whether the agent has read-only access to the block.", + "default": false + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of the block." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata of the block.", + "default": {} + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the block will be hidden." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags to associate with the block." + }, + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this block in the file" + } + }, + "type": "object", + "required": ["value", "label", "id"], + "title": "BlockSchema", + "description": "Block with human-readable ID for agent file" + }, + "BlockUpdate": { + "properties": { + "value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Value", + "description": "Value of the block." + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Limit", + "description": "Character limit of the block." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name", + "description": "Name of the block if it is a template." + }, + "is_template": { + "type": "boolean", + "title": "Is Template", + "description": "Whether the block is a template (e.g. saved human/persona options).", + "default": false + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id of the block." + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id", + "description": "The id of the entity within the template." + }, + "preserve_on_migration": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Preserve On Migration", + "description": "Preserve the block on template migration.", + "default": false + }, + "label": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Label", + "description": "Label of the block (e.g. 'human', 'persona') in the context window." + }, + "read_only": { + "type": "boolean", + "title": "Read Only", + "description": "Whether the agent has read-only access to the block.", + "default": false + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of the block." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata of the block.", + "default": {} + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the block will be hidden." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags to associate with the block." + } + }, + "type": "object", + "title": "BlockUpdate", + "description": "Update a block" + }, + "Body_export_agent": { + "properties": { + "spec": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentFileSchema" + }, + { + "type": "null" + } + ] + }, + "legacy_spec": { + "anyOf": [ + { + "$ref": "#/components/schemas/letta__serialize_schemas__pydantic_agent_schema__AgentSchema" + }, + { + "type": "null" + } + ] + } + }, + "type": "object", + "title": "Body_export_agent" + }, + "Body_import_agent": { + "properties": { + "file": { + "type": "string", + "format": "binary", + "title": "File" + }, + "override_existing_tools": { + "type": "boolean", + "title": "Override Existing Tools", + "description": "If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.", + "default": true + }, + "strip_messages": { + "type": "boolean", + "title": "Strip Messages", + "description": "If set to True, strips all messages from the agent before importing.", + "default": false + }, + "secrets": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Secrets", + "description": "Secrets as a JSON string to pass to the agent for tool execution." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "If provided, overrides the agent name with this value." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "Embedding handle to override with." + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "Model handle to override the agent's default model. This allows the imported agent to use a different model while keeping other defaults (e.g., context size) from the original configuration." + }, + "append_copy_suffix": { + "type": "boolean", + "title": "Append Copy Suffix", + "description": "If set to True, appends \"_copy\" to the end of the agent name.", + "default": true, + "deprecated": true + }, + "override_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Name", + "description": "If provided, overrides the agent name with this value. Use 'name' instead.", + "deprecated": true + }, + "override_embedding_handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Embedding Handle", + "description": "Override import with specific embedding handle. Use 'embedding' instead.", + "deprecated": true + }, + "override_model_handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Model Handle", + "description": "Model handle to override the agent's default model. Use 'model' instead.", + "deprecated": true + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project ID to associate the uploaded agent with. This is now passed via headers.", + "deprecated": true + }, + "env_vars_json": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Env Vars Json", + "description": "Environment variables as a JSON string to pass to the agent for tool execution. Use 'secrets' instead.", + "deprecated": true + } + }, + "type": "object", + "required": ["file"], + "title": "Body_import_agent" + }, + "Body_upload_file_to_folder": { + "properties": { + "file": { + "type": "string", + "format": "binary", + "title": "File" + } + }, + "type": "object", + "required": ["file"], + "title": "Body_upload_file_to_folder" + }, + "Body_upload_file_to_source": { + "properties": { + "file": { + "type": "string", + "format": "binary", + "title": "File" + } + }, + "type": "object", + "required": ["file"], + "title": "Body_upload_file_to_source" + }, + "CancelAgentRunRequest": { + "properties": { + "run_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Run Ids", + "description": "Optional list of run IDs to cancel" + } + }, + "type": "object", + "title": "CancelAgentRunRequest" + }, + "ChatCompletion": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "choices": { + "items": { + "$ref": "#/components/schemas/Choice" + }, + "type": "array", + "title": "Choices" + }, + "created": { + "type": "integer", + "title": "Created" + }, + "model": { + "type": "string", + "title": "Model" + }, + "object": { + "type": "string", + "const": "chat.completion", + "title": "Object" + }, + "service_tier": { + "anyOf": [ + { + "type": "string", + "enum": ["auto", "default", "flex", "scale", "priority"] + }, + { + "type": "null" + } + ], + "title": "Service Tier" + }, + "system_fingerprint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System Fingerprint" + }, + "usage": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompletionUsage" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": true, + "type": "object", + "required": ["id", "choices", "created", "model", "object"], + "title": "ChatCompletion", + "description": "Represents a chat completion response returned by model, based on the provided input." + }, + "ChatCompletionAssistantMessageParam": { + "properties": { + "role": { + "type": "string", + "const": "assistant", + "title": "Role" + }, + "audio": { + "anyOf": [ + { + "$ref": "#/components/schemas/Audio" + }, + { + "type": "null" + } + ] + }, + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/ChatCompletionContentPartTextParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionContentPartRefusalParam" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Content" + }, + "function_call": { + "anyOf": [ + { + "$ref": "#/components/schemas/FunctionCall-Input" + }, + { + "type": "null" + } + ] + }, + "name": { + "type": "string", + "title": "Name" + }, + "refusal": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refusal" + }, + "tool_calls": { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/ChatCompletionMessageFunctionToolCallParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionMessageCustomToolCallParam" + } + ] + }, + "type": "array", + "title": "Tool Calls" + } + }, + "type": "object", + "required": ["role"], + "title": "ChatCompletionAssistantMessageParam", + "description": "Messages sent by the model in response to user messages." + }, + "ChatCompletionAudio": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "data": { + "type": "string", + "title": "Data" + }, + "expires_at": { + "type": "integer", + "title": "Expires At" + }, + "transcript": { + "type": "string", + "title": "Transcript" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["id", "data", "expires_at", "transcript"], + "title": "ChatCompletionAudio", + "description": "If the audio output modality is requested, this object contains data\nabout the audio response from the model. [Learn more](https://platform.openai.com/docs/guides/audio)." + }, + "ChatCompletionContentPartImageParam": { + "properties": { + "image_url": { + "$ref": "#/components/schemas/ImageURL" + }, + "type": { + "type": "string", + "const": "image_url", + "title": "Type" + } + }, + "type": "object", + "required": ["image_url", "type"], + "title": "ChatCompletionContentPartImageParam", + "description": "Learn about [image inputs](https://platform.openai.com/docs/guides/vision)." + }, + "ChatCompletionContentPartInputAudioParam": { + "properties": { + "input_audio": { + "$ref": "#/components/schemas/InputAudio" + }, + "type": { + "type": "string", + "const": "input_audio", + "title": "Type" + } + }, + "type": "object", + "required": ["input_audio", "type"], + "title": "ChatCompletionContentPartInputAudioParam", + "description": "Learn about [audio inputs](https://platform.openai.com/docs/guides/audio)." + }, + "ChatCompletionContentPartRefusalParam": { + "properties": { + "refusal": { + "type": "string", + "title": "Refusal" + }, + "type": { + "type": "string", + "const": "refusal", + "title": "Type" + } + }, + "type": "object", + "required": ["refusal", "type"], + "title": "ChatCompletionContentPartRefusalParam" + }, + "ChatCompletionContentPartTextParam": { + "properties": { + "text": { + "type": "string", + "title": "Text" + }, + "type": { + "type": "string", + "const": "text", + "title": "Type" + } + }, + "type": "object", + "required": ["text", "type"], + "title": "ChatCompletionContentPartTextParam", + "description": "Learn about [text inputs](https://platform.openai.com/docs/guides/text-generation)." + }, + "ChatCompletionDeveloperMessageParam": { + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/components/schemas/ChatCompletionContentPartTextParam" + }, + "type": "array" + } + ], + "title": "Content" + }, + "role": { + "type": "string", + "const": "developer", + "title": "Role" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "type": "object", + "required": ["content", "role"], + "title": "ChatCompletionDeveloperMessageParam", + "description": "Developer-provided instructions that the model should follow, regardless of\nmessages sent by the user. With o1 models and newer, `developer` messages\nreplace the previous `system` messages." + }, + "ChatCompletionFunctionMessageParam": { + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Content" + }, + "name": { + "type": "string", + "title": "Name" + }, + "role": { + "type": "string", + "const": "function", + "title": "Role" + } + }, + "type": "object", + "required": ["content", "name", "role"], + "title": "ChatCompletionFunctionMessageParam" + }, + "ChatCompletionMessage": { + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Content" + }, + "refusal": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refusal" + }, + "role": { + "type": "string", + "const": "assistant", + "title": "Role" + }, + "annotations": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/Annotation" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Annotations" + }, + "audio": { + "anyOf": [ + { + "$ref": "#/components/schemas/ChatCompletionAudio" + }, + { + "type": "null" + } + ] + }, + "function_call": { + "anyOf": [ + { + "$ref": "#/components/schemas/FunctionCall-Output" + }, + { + "type": "null" + } + ] + }, + "tool_calls": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/ChatCompletionMessageFunctionToolCall-Output" + }, + { + "$ref": "#/components/schemas/ChatCompletionMessageCustomToolCall" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Calls" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["role"], + "title": "ChatCompletionMessage", + "description": "A chat completion message generated by the model." + }, + "ChatCompletionMessageCustomToolCall": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "custom": { + "$ref": "#/components/schemas/Custom-Output" + }, + "type": { + "type": "string", + "const": "custom", + "title": "Type" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["id", "custom", "type"], + "title": "ChatCompletionMessageCustomToolCall", + "description": "A call to a custom tool created by the model." + }, + "ChatCompletionMessageCustomToolCallParam": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "custom": { + "$ref": "#/components/schemas/Custom-Input" + }, + "type": { + "type": "string", + "const": "custom", + "title": "Type" + } + }, + "type": "object", + "required": ["id", "custom", "type"], + "title": "ChatCompletionMessageCustomToolCallParam", + "description": "A call to a custom tool created by the model." + }, + "ChatCompletionMessageFunctionToolCall-Input": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "function": { + "$ref": "#/components/schemas/openai__types__chat__chat_completion_message_function_tool_call__Function" + }, + "type": { + "type": "string", + "const": "function", + "title": "Type" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["id", "function", "type"], + "title": "ChatCompletionMessageFunctionToolCall", + "description": "A call to a function tool created by the model." + }, + "ChatCompletionMessageFunctionToolCall-Output": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "function": { + "$ref": "#/components/schemas/Function-Output" + }, + "type": { + "type": "string", + "const": "function", + "title": "Type" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["id", "function", "type"], + "title": "ChatCompletionMessageFunctionToolCall", + "description": "A call to a function tool created by the model." + }, + "ChatCompletionMessageFunctionToolCallParam": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "function": { + "$ref": "#/components/schemas/openai__types__chat__chat_completion_message_function_tool_call_param__Function" + }, + "type": { + "type": "string", + "const": "function", + "title": "Type" + } + }, + "type": "object", + "required": ["id", "function", "type"], + "title": "ChatCompletionMessageFunctionToolCallParam", + "description": "A call to a function tool created by the model." + }, + "ChatCompletionRequest": { + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "ID of the model to use" + }, + "messages": { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/ChatCompletionDeveloperMessageParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionSystemMessageParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionUserMessageParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionAssistantMessageParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionToolMessageParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionFunctionMessageParam" + } + ] + }, + "type": "array", + "title": "Messages", + "description": "Messages comprising the conversation so far" + }, + "temperature": { + "anyOf": [ + { + "type": "number", + "maximum": 2, + "minimum": 0 + }, + { + "type": "null" + } + ], + "title": "Temperature", + "description": "Sampling temperature" + }, + "top_p": { + "anyOf": [ + { + "type": "number", + "maximum": 1, + "minimum": 0 + }, + { + "type": "null" + } + ], + "title": "Top P", + "description": "Nucleus sampling parameter" + }, + "n": { + "anyOf": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "null" + } + ], + "title": "N", + "description": "Number of chat completion choices to generate", + "default": 1 + }, + "stream": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Stream", + "description": "Whether to stream back partial progress", + "default": false + }, + "stop": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stop", + "description": "Sequences where the API will stop generating" + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "Maximum number of tokens to generate" + }, + "presence_penalty": { + "anyOf": [ + { + "type": "number", + "maximum": 2, + "minimum": -2 + }, + { + "type": "null" + } + ], + "title": "Presence Penalty", + "description": "Presence penalty" + }, + "frequency_penalty": { + "anyOf": [ + { + "type": "number", + "maximum": 2, + "minimum": -2 + }, + { + "type": "null" + } + ], + "title": "Frequency Penalty", + "description": "Frequency penalty" + }, + "user": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "User", + "description": "A unique identifier representing your end-user" + } + }, + "type": "object", + "required": ["model", "messages"], + "title": "ChatCompletionRequest", + "description": "OpenAI-compatible chat completion request - exactly matching OpenAI's schema." + }, + "ChatCompletionSystemMessageParam": { + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/components/schemas/ChatCompletionContentPartTextParam" + }, + "type": "array" + } + ], + "title": "Content" + }, + "role": { + "type": "string", + "const": "system", + "title": "Role" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "type": "object", + "required": ["content", "role"], + "title": "ChatCompletionSystemMessageParam", + "description": "Developer-provided instructions that the model should follow, regardless of\nmessages sent by the user. With o1 models and newer, use `developer` messages\nfor this purpose instead." + }, + "ChatCompletionTokenLogprob": { + "properties": { + "token": { + "type": "string", + "title": "Token" + }, + "bytes": { + "anyOf": [ + { + "items": { + "type": "integer" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Bytes" + }, + "logprob": { + "type": "number", + "title": "Logprob" + }, + "top_logprobs": { + "items": { + "$ref": "#/components/schemas/TopLogprob" + }, + "type": "array", + "title": "Top Logprobs" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["token", "logprob", "top_logprobs"], + "title": "ChatCompletionTokenLogprob" + }, + "ChatCompletionToolMessageParam": { + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/components/schemas/ChatCompletionContentPartTextParam" + }, + "type": "array" + } + ], + "title": "Content" + }, + "role": { + "type": "string", + "const": "tool", + "title": "Role" + }, + "tool_call_id": { + "type": "string", + "title": "Tool Call Id" + } + }, + "type": "object", + "required": ["content", "role", "tool_call_id"], + "title": "ChatCompletionToolMessageParam" + }, + "ChatCompletionUserMessageParam": { + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/ChatCompletionContentPartTextParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionContentPartImageParam" + }, + { + "$ref": "#/components/schemas/ChatCompletionContentPartInputAudioParam" + }, + { + "$ref": "#/components/schemas/File" + } + ] + }, + "type": "array" + } + ], + "title": "Content" + }, + "role": { + "type": "string", + "const": "user", + "title": "Role" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "type": "object", + "required": ["content", "role"], + "title": "ChatCompletionUserMessageParam", + "description": "Messages sent by an end user, containing prompts or additional context\ninformation." + }, + "ChatGPTOAuthModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "chatgpt_oauth", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "chatgpt_oauth" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "reasoning": { + "$ref": "#/components/schemas/ChatGPTOAuthReasoning", + "description": "The reasoning configuration for the model.", + "default": { + "reasoning_effort": "medium" + } + } + }, + "type": "object", + "title": "ChatGPTOAuthModelSettings", + "description": "ChatGPT OAuth model configuration (uses ChatGPT backend API)." + }, + "ChatGPTOAuthReasoning": { + "properties": { + "reasoning_effort": { + "type": "string", + "enum": ["none", "low", "medium", "high", "xhigh"], + "title": "Reasoning Effort", + "description": "The reasoning effort level for GPT-5.x and o-series models.", + "default": "medium" + } + }, + "type": "object", + "title": "ChatGPTOAuthReasoning", + "description": "Reasoning configuration for ChatGPT OAuth models (GPT-5.x, o-series)." + }, + "ChildToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "constrain_child_tools", + "title": "Type", + "default": "constrain_child_tools" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored)." + }, + "children": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Children", + "description": "The children tools that can be invoked." + }, + "child_arg_nodes": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ToolCallNode" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Child Arg Nodes", + "description": "Optional list of typed child argument overrides. Each node must reference a child in 'children'." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name", "children"], + "title": "ChildToolRule", + "description": "A ToolRule represents a tool that can be invoked by the agent." + }, + "ChildToolRuleSchema": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name" + }, + "type": { + "type": "string", + "title": "Type" + }, + "children": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Children" + } + }, + "type": "object", + "required": ["tool_name", "type", "children"], + "title": "ChildToolRuleSchema" + }, + "Choice": { + "properties": { + "finish_reason": { + "type": "string", + "enum": [ + "stop", + "length", + "tool_calls", + "content_filter", + "function_call" + ], + "title": "Finish Reason" + }, + "index": { + "type": "integer", + "title": "Index" + }, + "logprobs": { + "anyOf": [ + { + "$ref": "#/components/schemas/ChoiceLogprobs" + }, + { + "type": "null" + } + ] + }, + "message": { + "$ref": "#/components/schemas/ChatCompletionMessage" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["finish_reason", "index", "message"], + "title": "Choice" + }, + "ChoiceLogprobs": { + "properties": { + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ChatCompletionTokenLogprob" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Content" + }, + "refusal": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ChatCompletionTokenLogprob" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Refusal" + } + }, + "additionalProperties": true, + "type": "object", + "title": "ChoiceLogprobs", + "description": "Log probability information for the choice." + }, + "ClientToolSchema": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the tool function" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of what the tool does" + }, + "parameters": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Parameters", + "description": "JSON Schema for the function parameters" + } + }, + "type": "object", + "required": ["name"], + "title": "ClientToolSchema", + "description": "Schema for a client-side tool passed in the request.\n\nClient-side tools are executed by the client, not the server. When the agent\ncalls a client-side tool, execution pauses and returns control to the client\nto execute the tool and provide the result." + }, + "CodeInput": { + "properties": { + "code": { + "type": "string", + "title": "Code", + "description": "Source code to parse for JSON schema" + }, + "source_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Type", + "description": "The source type of the code (python or typescript)", + "default": "python" + } + }, + "type": "object", + "required": ["code"], + "title": "CodeInput" + }, + "CompactionRequest": { + "properties": { + "compaction_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionSettings-Input" + }, + { + "type": "null" + } + ], + "description": "Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used." + } + }, + "type": "object", + "title": "CompactionRequest" + }, + "CompactionResponse": { + "properties": { + "summary": { + "type": "string", + "title": "Summary" + }, + "num_messages_before": { + "type": "integer", + "title": "Num Messages Before" + }, + "num_messages_after": { + "type": "integer", + "title": "Num Messages After" + } + }, + "type": "object", + "required": ["summary", "num_messages_before", "num_messages_after"], + "title": "CompactionResponse" + }, + "CompactionSettings-Input": { + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "Model handle to use for summarization (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/ZAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + }, + { + "$ref": "#/components/schemas/OpenRouterModelSettings" + }, + { + "$ref": "#/components/schemas/ChatGPTOAuthModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "chatgpt_oauth": "#/components/schemas/ChatGPTOAuthModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "openrouter": "#/components/schemas/OpenRouterModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings", + "zai": "#/components/schemas/ZAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "Optional model settings used to override defaults for the summarizer model." + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "The prompt to use for summarization.", + "default": "The following messages are being evicted from your context window. Write a detailed summary that captures what happened in these messages.\n\nThis summary will appear BEFORE the remaining recent messages in context, providing background for what comes after. Include:\n\n1. **What happened**: The conversations, tasks, and exchanges that took place. What did the user ask for? What did you do? How did things progress?\n\n2. **High level goals**: If there is an existing summary in the transcript, make sure to take it into consideration to continue tracking the higher level goals and long-term progress. Make sure to not lose track of higher level goals or the ongoing task.\n\n3. **Important details**: Specific names, data, configurations, or facts that were discussed. Don't omit details that might be referenced later.\n\n4. **Lookup hints**: For any detailed content (long lists, extensive data, specific conversations) that couldn't fit in the summary, note the topic and key terms that could be used to find it in message history later.\n\nWrite in first person as a factual record of what occurred. Be thorough and detailed - the goal is to preserve enough context that the recent messages make sense and important information isn't lost.\n\nKeep your summary under 250 words. Only output the summary." + }, + "prompt_acknowledgement": { + "type": "boolean", + "title": "Prompt Acknowledgement", + "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).", + "default": false + }, + "clip_chars": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Clip Chars", + "description": "The maximum length of the summary in characters. If none, no clipping is performed.", + "default": 50000 + }, + "mode": { + "type": "string", + "enum": ["all", "sliding_window"], + "title": "Mode", + "description": "The type of summarization technique use.", + "default": "sliding_window" + }, + "sliding_window_percentage": { + "type": "number", + "title": "Sliding Window Percentage", + "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)." + } + }, + "type": "object", + "required": ["model"], + "title": "CompactionSettings", + "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." + }, + "CompactionSettings-Output": { + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "Model handle to use for summarization (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/ZAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + }, + { + "$ref": "#/components/schemas/OpenRouterModelSettings" + }, + { + "$ref": "#/components/schemas/ChatGPTOAuthModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "chatgpt_oauth": "#/components/schemas/ChatGPTOAuthModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "openrouter": "#/components/schemas/OpenRouterModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings", + "zai": "#/components/schemas/ZAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "Optional model settings used to override defaults for the summarizer model." + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "The prompt to use for summarization.", + "default": "The following messages are being evicted from your context window. Write a detailed summary that captures what happened in these messages.\n\nThis summary will appear BEFORE the remaining recent messages in context, providing background for what comes after. Include:\n\n1. **What happened**: The conversations, tasks, and exchanges that took place. What did the user ask for? What did you do? How did things progress?\n\n2. **High level goals**: If there is an existing summary in the transcript, make sure to take it into consideration to continue tracking the higher level goals and long-term progress. Make sure to not lose track of higher level goals or the ongoing task.\n\n3. **Important details**: Specific names, data, configurations, or facts that were discussed. Don't omit details that might be referenced later.\n\n4. **Lookup hints**: For any detailed content (long lists, extensive data, specific conversations) that couldn't fit in the summary, note the topic and key terms that could be used to find it in message history later.\n\nWrite in first person as a factual record of what occurred. Be thorough and detailed - the goal is to preserve enough context that the recent messages make sense and important information isn't lost.\n\nKeep your summary under 250 words. Only output the summary." + }, + "prompt_acknowledgement": { + "type": "boolean", + "title": "Prompt Acknowledgement", + "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).", + "default": false + }, + "clip_chars": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Clip Chars", + "description": "The maximum length of the summary in characters. If none, no clipping is performed.", + "default": 50000 + }, + "mode": { + "type": "string", + "enum": ["all", "sliding_window"], + "title": "Mode", + "description": "The type of summarization technique use.", + "default": "sliding_window" + }, + "sliding_window_percentage": { + "type": "number", + "title": "Sliding Window Percentage", + "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)." + } + }, + "type": "object", + "required": ["model"], + "title": "CompactionSettings", + "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." + }, + "ComparisonOperator": { + "type": "string", + "enum": ["eq", "gte", "lte"], + "title": "ComparisonOperator", + "description": "Comparison operators for filtering numeric values" + }, + "CompletionTokensDetails": { + "properties": { + "accepted_prediction_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Accepted Prediction Tokens" + }, + "audio_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Audio Tokens" + }, + "reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Reasoning Tokens" + }, + "rejected_prediction_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Rejected Prediction Tokens" + } + }, + "additionalProperties": true, + "type": "object", + "title": "CompletionTokensDetails", + "description": "Breakdown of tokens used in a completion." + }, + "CompletionUsage": { + "properties": { + "completion_tokens": { + "type": "integer", + "title": "Completion Tokens" + }, + "prompt_tokens": { + "type": "integer", + "title": "Prompt Tokens" + }, + "total_tokens": { + "type": "integer", + "title": "Total Tokens" + }, + "completion_tokens_details": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompletionTokensDetails" + }, + { + "type": "null" + } + ] + }, + "prompt_tokens_details": { + "anyOf": [ + { + "$ref": "#/components/schemas/PromptTokensDetails" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": true, + "type": "object", + "required": ["completion_tokens", "prompt_tokens", "total_tokens"], + "title": "CompletionUsage", + "description": "Usage statistics for the completion request." + }, + "ConditionalToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "conditional", + "title": "Type", + "default": "conditional" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored)." + }, + "default_child": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Default Child", + "description": "The default child tool to be called. If None, any tool can be called." + }, + "child_output_mapping": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Child Output Mapping", + "description": "The output case to check for mapping" + }, + "require_output_mapping": { + "type": "boolean", + "title": "Require Output Mapping", + "description": "Whether to throw an error when output doesn't match any case", + "default": false + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name", "child_output_mapping"], + "title": "ConditionalToolRule", + "description": "A ToolRule that conditionally maps to different child tools based on the output." + }, + "ConditionalToolRuleSchema": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name" + }, + "type": { + "type": "string", + "title": "Type" + }, + "default_child": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Default Child" + }, + "child_output_mapping": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Child Output Mapping" + }, + "require_output_mapping": { + "type": "boolean", + "title": "Require Output Mapping" + } + }, + "type": "object", + "required": [ + "tool_name", + "type", + "default_child", + "child_output_mapping", + "require_output_mapping" + ], + "title": "ConditionalToolRuleSchema" + }, + "ContextWindowOverview": { + "properties": { + "context_window_size_max": { + "type": "integer", + "title": "Context Window Size Max", + "description": "The maximum amount of tokens the context window can hold." + }, + "context_window_size_current": { + "type": "integer", + "title": "Context Window Size Current", + "description": "The current number of tokens in the context window." + }, + "num_messages": { + "type": "integer", + "title": "Num Messages", + "description": "The number of messages in the context window." + }, + "num_archival_memory": { + "type": "integer", + "title": "Num Archival Memory", + "description": "The number of messages in the archival memory." + }, + "num_recall_memory": { + "type": "integer", + "title": "Num Recall Memory", + "description": "The number of messages in the recall memory." + }, + "num_tokens_external_memory_summary": { + "type": "integer", + "title": "Num Tokens External Memory Summary", + "description": "The number of tokens in the external memory summary (archival + recall metadata)." + }, + "external_memory_summary": { + "type": "string", + "title": "External Memory Summary", + "description": "The metadata summary of the external memory sources (archival + recall metadata)." + }, + "num_tokens_system": { + "type": "integer", + "title": "Num Tokens System", + "description": "The number of tokens in the system prompt." + }, + "system_prompt": { + "type": "string", + "title": "System Prompt", + "description": "The content of the system prompt." + }, + "num_tokens_core_memory": { + "type": "integer", + "title": "Num Tokens Core Memory", + "description": "The number of tokens in the core memory." + }, + "core_memory": { + "type": "string", + "title": "Core Memory", + "description": "The content of the core memory." + }, + "num_tokens_summary_memory": { + "type": "integer", + "title": "Num Tokens Summary Memory", + "description": "The number of tokens in the summary memory." + }, + "summary_memory": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Summary Memory", + "description": "The content of the summary memory." + }, + "num_tokens_functions_definitions": { + "type": "integer", + "title": "Num Tokens Functions Definitions", + "description": "The number of tokens in the functions definitions." + }, + "functions_definitions": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/FunctionTool" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Functions Definitions", + "description": "The content of the functions definitions." + }, + "num_tokens_messages": { + "type": "integer", + "title": "Num Tokens Messages", + "description": "The number of tokens in the messages list." + }, + "messages": { + "items": { + "$ref": "#/components/schemas/Message" + }, + "type": "array", + "title": "Messages", + "description": "The messages in the context window." + } + }, + "type": "object", + "required": [ + "context_window_size_max", + "context_window_size_current", + "num_messages", + "num_archival_memory", + "num_recall_memory", + "num_tokens_external_memory_summary", + "external_memory_summary", + "num_tokens_system", + "system_prompt", + "num_tokens_core_memory", + "core_memory", + "num_tokens_summary_memory", + "num_tokens_functions_definitions", + "functions_definitions", + "num_tokens_messages", + "messages" + ], + "title": "ContextWindowOverview", + "description": "Overview of the context window, including the number of messages and tokens." + }, + "ContinueToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "continue_loop", + "title": "Type", + "default": "continue_loop" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored)." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name"], + "title": "ContinueToolRule", + "description": "Represents a tool rule configuration where if this tool gets called, it must continue the agent loop." + }, + "Conversation": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "id": { + "type": "string", + "title": "Id", + "description": "The unique identifier of the conversation." + }, + "agent_id": { + "type": "string", + "title": "Agent Id", + "description": "The ID of the agent this conversation belongs to." + }, + "summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Summary", + "description": "A summary of the conversation." + }, + "in_context_message_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "In Context Message Ids", + "description": "The IDs of in-context messages for the conversation." + }, + "isolated_block_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Isolated Block Ids", + "description": "IDs of blocks that are isolated (specific to this conversation, overriding agent defaults)." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["id", "agent_id"], + "title": "Conversation", + "description": "Represents a conversation on an agent for concurrent messaging." + }, + "ConversationMessageRequest": { + "properties": { + "messages": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/MessageCreate" + }, + { + "$ref": "#/components/schemas/ApprovalCreate" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Messages", + "description": "The messages to be sent to the agent." + }, + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + }, + { + "$ref": "#/components/schemas/ToolCallContent" + }, + { + "$ref": "#/components/schemas/ToolReturnContent" + }, + { + "$ref": "#/components/schemas/ReasoningContent" + }, + { + "$ref": "#/components/schemas/RedactedReasoningContent" + }, + { + "$ref": "#/components/schemas/OmittedReasoningContent" + }, + { + "$ref": "#/components/schemas/SummarizedReasoningContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "omitted_reasoning": "#/components/schemas/OmittedReasoningContent", + "reasoning": "#/components/schemas/ReasoningContent", + "redacted_reasoning": "#/components/schemas/RedactedReasoningContent", + "summarized_reasoning": "#/components/schemas/SummarizedReasoningContent", + "text": "#/components/schemas/TextContent", + "tool_call": "#/components/schemas/ToolCallContent", + "tool_return": "#/components/schemas/ToolReturnContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Input", + "description": "Syntactic sugar for a single user message. Equivalent to messages=[{'role': 'user', 'content': input}]." + }, + "max_steps": { + "type": "integer", + "title": "Max Steps", + "description": "Maximum number of steps the agent should take to process the request.", + "default": 50 + }, + "use_assistant_message": { + "type": "boolean", + "title": "Use Assistant Message", + "description": "Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": true, + "deprecated": true + }, + "assistant_message_tool_name": { + "type": "string", + "title": "Assistant Message Tool Name", + "description": "The name of the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "send_message", + "deprecated": true + }, + "assistant_message_tool_kwarg": { + "type": "string", + "title": "Assistant Message Tool Kwarg", + "description": "The name of the message argument in the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "message", + "deprecated": true + }, + "include_return_message_types": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Include Return Message Types", + "description": "Only return specified message types in the response. If `None` (default) returns all messages." + }, + "enable_thinking": { + "type": "string", + "title": "Enable Thinking", + "description": "If set to True, enables reasoning before responses or tool calls from the agent.", + "default": true, + "deprecated": true + }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, + "override_model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Model", + "description": "Model handle to use for this request instead of the agent's default model. This allows sending a message to a different model without changing the agent's configuration." + }, + "streaming": { + "type": "boolean", + "title": "Streaming", + "description": "If True (default), returns a streaming response (Server-Sent Events). If False, returns a complete JSON response.", + "default": true + }, + "stream_tokens": { + "type": "boolean", + "title": "Stream Tokens", + "description": "Flag to determine if individual tokens should be streamed, rather than streaming per step (only used when streaming=true).", + "default": false + }, + "include_pings": { + "type": "boolean", + "title": "Include Pings", + "description": "Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts (only used when streaming=true).", + "default": true + }, + "background": { + "type": "boolean", + "title": "Background", + "description": "Whether to process the request in the background (only used when streaming=true).", + "default": false + } + }, + "type": "object", + "title": "ConversationMessageRequest", + "description": "Request for sending messages to a conversation. Streams by default." + }, + "CoreMemoryBlockSchema": { + "properties": { + "created_at": { + "type": "string", + "title": "Created At" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "is_template": { + "type": "boolean", + "title": "Is Template" + }, + "label": { + "type": "string", + "title": "Label" + }, + "limit": { + "type": "integer", + "title": "Limit" + }, + "metadata_": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata" + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name" + }, + "updated_at": { + "type": "string", + "title": "Updated At" + }, + "value": { + "type": "string", + "title": "Value" + } + }, + "type": "object", + "required": [ + "created_at", + "description", + "is_template", + "label", + "limit", + "template_name", + "updated_at", + "value" + ], + "title": "CoreMemoryBlockSchema" + }, + "CreateAgentRequest": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the agent." + }, + "memory_blocks": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/CreateBlock" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Memory Blocks", + "description": "The blocks to create in the agent's in-context memory." + }, + "tools": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools", + "description": "The tools used by the agent." + }, + "tool_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Ids", + "description": "The ids of the tools used by the agent." + }, + "source_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Source Ids", + "description": "Deprecated: Use `folder_ids` field instead. The ids of the sources used by the agent.", + "deprecated": true + }, + "folder_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Folder Ids", + "description": "The ids of the folders used by the agent." + }, + "block_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Block Ids", + "description": "The ids of the blocks used by the agent." + }, + "tool_rules": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/ChildToolRule" + }, + { + "$ref": "#/components/schemas/InitToolRule" + }, + { + "$ref": "#/components/schemas/TerminalToolRule" + }, + { + "$ref": "#/components/schemas/ConditionalToolRule" + }, + { + "$ref": "#/components/schemas/ContinueToolRule" + }, + { + "$ref": "#/components/schemas/RequiredBeforeExitToolRule" + }, + { + "$ref": "#/components/schemas/MaxCountPerStepToolRule" + }, + { + "$ref": "#/components/schemas/ParentToolRule" + }, + { + "$ref": "#/components/schemas/RequiresApprovalToolRule" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "conditional": "#/components/schemas/ConditionalToolRule", + "constrain_child_tools": "#/components/schemas/ChildToolRule", + "continue_loop": "#/components/schemas/ContinueToolRule", + "exit_loop": "#/components/schemas/TerminalToolRule", + "max_count_per_step": "#/components/schemas/MaxCountPerStepToolRule", + "parent_last_tool": "#/components/schemas/ParentToolRule", + "required_before_exit": "#/components/schemas/RequiredBeforeExitToolRule", + "requires_approval": "#/components/schemas/RequiresApprovalToolRule", + "run_first": "#/components/schemas/InitToolRule" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Rules", + "description": "The tool rules governing the agent." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags associated with the agent." + }, + "system": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System", + "description": "The system prompt used by the agent." + }, + "agent_type": { + "$ref": "#/components/schemas/AgentType", + "description": "The type of agent." + }, + "initial_message_sequence": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageCreate" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Initial Message Sequence", + "description": "The initial set of messages to put in the agent's in-context memory." + }, + "include_base_tools": { + "type": "boolean", + "title": "Include Base Tools", + "description": "If true, attaches the Letta core tools (e.g. core_memory related functions).", + "default": true + }, + "include_multi_agent_tools": { + "type": "boolean", + "title": "Include Multi Agent Tools", + "description": "If true, attaches the Letta multi-agent tools (e.g. sending a message to another agent).", + "default": false + }, + "include_base_tool_rules": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Base Tool Rules", + "description": "If true, attaches the Letta base tool rules (e.g. deny all tools not explicitly allowed)." + }, + "include_default_source": { + "type": "boolean", + "title": "Include Default Source", + "description": "If true, automatically creates and attaches a default data source for this agent.", + "default": false, + "deprecated": true + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the agent." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the agent." + }, + "llm_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LLMConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `model` field instead. The LLM configuration used by the agent.", + "deprecated": true + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", + "deprecated": true + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle for the agent to use (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/ZAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + }, + { + "$ref": "#/components/schemas/OpenRouterModelSettings" + }, + { + "$ref": "#/components/schemas/ChatGPTOAuthModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "chatgpt_oauth": "#/components/schemas/ChatGPTOAuthModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "openrouter": "#/components/schemas/OpenRouterModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings", + "zai": "#/components/schemas/ZAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "The model settings for the agent." + }, + "compaction_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionSettings-Input" + }, + { + "type": "null" + } + ], + "description": "The compaction settings configuration used for compaction." + }, + "context_window_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Context Window Limit", + "description": "The context window limit used by the agent." + }, + "embedding_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Embedding Chunk Size", + "description": "Deprecated: No longer used. The embedding chunk size used by the agent.", + "default": 300, + "deprecated": true + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "Deprecated: Use `model` field to configure max output tokens instead. The maximum number of tokens to generate, including reasoning step.", + "deprecated": true + }, + "max_reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Reasoning Tokens", + "description": "Deprecated: Use `model` field to configure reasoning tokens instead. The maximum number of tokens to generate for reasoning step.", + "deprecated": true + }, + "enable_reasoner": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Reasoner", + "description": "Deprecated: Use `model` field to configure reasoning instead. Whether to enable internal extended thinking step for a reasoner model.", + "default": true, + "deprecated": true + }, + "reasoning": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Reasoning", + "description": "Deprecated: Use `model` field to configure reasoning instead. Whether to enable reasoning for this agent.", + "deprecated": true + }, + "from_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "From Template", + "description": "Deprecated: please use the 'create agents from a template' endpoint instead.", + "deprecated": true + }, + "template": { + "type": "boolean", + "title": "Template", + "description": "Deprecated: No longer used.", + "default": false, + "deprecated": true + }, + "project": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project", + "description": "Deprecated: Project should now be passed via the X-Project header instead of in the request body. If using the SDK, this can be done via the x_project parameter.", + "deprecated": true + }, + "tool_exec_environment_variables": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Tool Exec Environment Variables", + "description": "Deprecated: Use `secrets` field instead. Environment variables for tool execution.", + "deprecated": true + }, + "secrets": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Secrets", + "description": "The environment variables for tool execution specific to this agent." + }, + "memory_variables": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Memory Variables", + "description": "Deprecated: Only relevant for creating agents from a template. Use the 'create agents from a template' endpoint instead.", + "deprecated": true + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "Deprecated: No longer used. The id of the project the agent belongs to.", + "deprecated": true + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "Deprecated: No longer used. The id of the template the agent belongs to.", + "deprecated": true + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "Deprecated: No longer used. The base template id of the agent.", + "deprecated": true + }, + "identity_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Identity Ids", + "description": "The ids of the identities associated with this agent." + }, + "message_buffer_autoclear": { + "type": "boolean", + "title": "Message Buffer Autoclear", + "description": "If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.", + "default": false + }, + "enable_sleeptime": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Sleeptime", + "description": "If set to True, memory management will move to a background agent thread." + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + "deprecated": true + }, + "timezone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The timezone of the agent (IANA format)." + }, + "max_files_open": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Files Open", + "description": "Maximum number of files that can be open at once for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "per_file_view_window_char_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Per File View Window Char Limit", + "description": "The per-file view window character limit for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "Deprecated: No longer used. If set to True, the agent will be hidden.", + "deprecated": true + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Parallel Tool Calls", + "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.", + "deprecated": true + } + }, + "type": "object", + "title": "CreateAgentRequest", + "description": "CreateAgent model specifically for POST request body, excluding user_id which comes from headers" + }, + "CreateArchivalMemory": { + "properties": { + "text": { + "type": "string", + "title": "Text", + "description": "Text to write to archival memory." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Optional list of tags to attach to the memory." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "Optional timestamp for the memory (defaults to current UTC time)." + } + }, + "type": "object", + "required": ["text"], + "title": "CreateArchivalMemory" + }, + "CreateBatch": { + "properties": { + "requests": { + "items": { + "$ref": "#/components/schemas/LettaBatchRequest" + }, + "type": "array", + "title": "Requests", + "description": "List of requests to be processed in batch." + }, + "callback_url": { + "anyOf": [ + { + "type": "string", + "maxLength": 2083, + "minLength": 1, + "format": "uri" + }, + { + "type": "null" + } + ], + "title": "Callback Url", + "description": "Optional URL to call via POST when the batch completes. The callback payload will be a JSON object with the following fields: {'job_id': string, 'status': string, 'completed_at': string}. Where 'job_id' is the unique batch job identifier, 'status' is the final batch status (e.g., 'completed', 'failed'), and 'completed_at' is an ISO 8601 timestamp indicating when the batch job completed." + } + }, + "type": "object", + "required": ["requests"], + "title": "CreateBatch" + }, + "CreateBlock": { + "properties": { + "value": { + "type": "string", + "title": "Value", + "description": "Value of the block." + }, + "limit": { + "type": "integer", + "title": "Limit", + "description": "Character limit of the block.", + "default": 20000 + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name", + "description": "Name of the block if it is a template." + }, + "is_template": { + "type": "boolean", + "title": "Is Template", + "default": false + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id of the block." + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id", + "description": "The id of the entity within the template." + }, + "preserve_on_migration": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Preserve On Migration", + "description": "Preserve the block on template migration.", + "default": false + }, + "label": { + "type": "string", + "title": "Label", + "description": "Label of the block." + }, + "read_only": { + "type": "boolean", + "title": "Read Only", + "description": "Whether the agent has read-only access to the block.", + "default": false + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of the block." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata of the block.", + "default": {} + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the block will be hidden." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags to associate with the block." + } + }, + "type": "object", + "required": ["value", "label"], + "title": "CreateBlock", + "description": "Create a block" + }, + "CreateConversation": { + "properties": { + "summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Summary", + "description": "A summary of the conversation." + }, + "isolated_block_labels": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Isolated Block Labels", + "description": "List of block labels that should be isolated (conversation-specific) rather than shared across conversations. New blocks will be created as copies of the agent's blocks with these labels." + } + }, + "type": "object", + "title": "CreateConversation", + "description": "Request model for creating a new conversation." + }, + "CreateMCPServerRequest": { + "properties": { + "server_name": { + "type": "string", + "title": "Server Name", + "description": "The name of the MCP server" + }, + "config": { + "oneOf": [ + { + "$ref": "#/components/schemas/CreateStdioMCPServer" + }, + { + "$ref": "#/components/schemas/CreateSSEMCPServer" + }, + { + "$ref": "#/components/schemas/CreateStreamableHTTPMCPServer" + } + ], + "title": "Config", + "description": "The MCP server configuration (Stdio, SSE, or Streamable HTTP)", + "discriminator": { + "propertyName": "mcp_server_type", + "mapping": { + "sse": "#/components/schemas/CreateSSEMCPServer", + "stdio": "#/components/schemas/CreateStdioMCPServer", + "streamable_http": "#/components/schemas/CreateStreamableHTTPMCPServer" + } + } + } + }, + "additionalProperties": false, + "type": "object", + "required": ["server_name", "config"], + "title": "CreateMCPServerRequest", + "description": "Request to create a new MCP server with configuration." + }, + "CreateSSEMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "sse", + "title": "Mcp Server Type", + "default": "sse" + }, + "server_url": { + "type": "string", + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["server_url"], + "title": "CreateSSEMCPServer", + "description": "Create a new SSE MCP server" + }, + "CreateStdioMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "stdio", + "title": "Mcp Server Type", + "default": "stdio" + }, + "command": { + "type": "string", + "title": "Command", + "description": "The command to run (MCP 'local' client will run this command)" + }, + "args": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Args", + "description": "The arguments to pass to the command" + }, + "env": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Env", + "description": "Environment variables to set" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["command", "args"], + "title": "CreateStdioMCPServer", + "description": "Create a new Stdio MCP server" + }, + "CreateStreamableHTTPMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "streamable_http", + "title": "Mcp Server Type", + "default": "streamable_http" + }, + "server_url": { + "type": "string", + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["server_url"], + "title": "CreateStreamableHTTPMCPServer", + "description": "Create a new Streamable HTTP MCP server" + }, + "Custom-Input": { + "properties": { + "input": { + "type": "string", + "title": "Input" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "type": "object", + "required": ["input", "name"], + "title": "Custom", + "description": "The custom tool that the model called." + }, + "Custom-Output": { + "properties": { + "input": { + "type": "string", + "title": "Input" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["input", "name"], + "title": "Custom", + "description": "The custom tool that the model called." + }, + "DeepseekModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "deepseek", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "deepseek" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "DeepseekModelSettings", + "description": "Deepseek model configuration (OpenAI-compatible)." + }, + "DeleteDeploymentResponse": { + "properties": { + "deleted_blocks": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Deleted Blocks", + "default": [] + }, + "deleted_agents": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Deleted Agents", + "default": [] + }, + "deleted_groups": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Deleted Groups", + "default": [] + }, + "message": { + "type": "string", + "title": "Message" + } + }, + "type": "object", + "required": ["message"], + "title": "DeleteDeploymentResponse", + "description": "Response model for delete deployment operation." + }, + "DeploymentEntity": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "type": { + "type": "string", + "title": "Type" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id" + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id" + } + }, + "type": "object", + "required": ["id", "type"], + "title": "DeploymentEntity", + "description": "A deployment entity." + }, + "DuplicateFileHandling": { + "type": "string", + "enum": ["skip", "error", "suffix", "replace"], + "title": "DuplicateFileHandling", + "description": "How to handle duplicate filenames when uploading files" + }, + "DynamicManager": { + "properties": { + "manager_type": { + "type": "string", + "const": "dynamic", + "title": "Manager Type", + "description": "", + "default": "dynamic" + }, + "manager_agent_id": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "title": "Manager Agent Id", + "description": "", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "termination_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Termination Token", + "description": "", + "default": "DONE!" + }, + "max_turns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Turns", + "description": "" + } + }, + "type": "object", + "required": ["manager_agent_id"], + "title": "DynamicManager" + }, + "DynamicManagerUpdate": { + "properties": { + "manager_type": { + "type": "string", + "const": "dynamic", + "title": "Manager Type", + "description": "", + "default": "dynamic" + }, + "manager_agent_id": { + "anyOf": [ + { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + { + "type": "null" + } + ], + "title": "Manager Agent Id", + "description": "" + }, + "termination_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Termination Token", + "description": "" + }, + "max_turns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Turns", + "description": "" + } + }, + "type": "object", + "title": "DynamicManagerUpdate" + }, + "E2BSandboxConfig": { + "properties": { + "timeout": { + "type": "integer", + "title": "Timeout", + "description": "Time limit for the sandbox (in seconds).", + "default": 300 + }, + "template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template", + "description": "The E2B template id (docker image)." + }, + "pip_requirements": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Pip Requirements", + "description": "A list of pip packages to install on the E2B Sandbox" + } + }, + "type": "object", + "title": "E2BSandboxConfig" + }, + "EmbeddingConfig": { + "properties": { + "embedding_endpoint_type": { + "type": "string", + "enum": [ + "openai", + "anthropic", + "bedrock", + "google_ai", + "google_vertex", + "azure", + "groq", + "ollama", + "webui", + "webui-legacy", + "lmstudio", + "lmstudio-legacy", + "llamacpp", + "koboldcpp", + "vllm", + "hugging-face", + "mistral", + "together", + "pinecone" + ], + "title": "Embedding Endpoint Type", + "description": "The endpoint type for the model." + }, + "embedding_endpoint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding Endpoint", + "description": "The endpoint for the model (`None` if local)." + }, + "embedding_model": { + "type": "string", + "title": "Embedding Model", + "description": "The model for the embedding." + }, + "embedding_dim": { + "type": "integer", + "title": "Embedding Dim", + "description": "The dimension of the embedding." + }, + "embedding_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Embedding Chunk Size", + "description": "The chunk size of the embedding.", + "default": 300 + }, + "handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Handle", + "description": "The handle for this config, in the format provider/model-name." + }, + "batch_size": { + "type": "integer", + "title": "Batch Size", + "description": "The maximum batch size for processing embeddings.", + "default": 32 + }, + "azure_endpoint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Azure Endpoint", + "description": "The Azure endpoint for the model." + }, + "azure_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Azure Version", + "description": "The Azure version for the model." + }, + "azure_deployment": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Azure Deployment", + "description": "The Azure deployment for the model." + } + }, + "type": "object", + "required": [ + "embedding_endpoint_type", + "embedding_model", + "embedding_dim" + ], + "title": "EmbeddingConfig", + "description": "Configuration for embedding model connection and processing parameters." + }, + "EmbeddingModel": { + "properties": { + "handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Handle", + "description": "The handle for this config, in the format provider/model-name." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The actual model name used by the provider" + }, + "display_name": { + "type": "string", + "title": "Display Name", + "description": "Display name for the model shown in UI" + }, + "provider_type": { + "$ref": "#/components/schemas/ProviderType", + "description": "The type of the provider" + }, + "provider_name": { + "type": "string", + "title": "Provider Name", + "description": "The name of the provider" + }, + "model_type": { + "type": "string", + "const": "embedding", + "title": "Model Type", + "description": "Type of model (llm or embedding)", + "default": "embedding" + }, + "embedding_endpoint_type": { + "type": "string", + "enum": [ + "openai", + "anthropic", + "bedrock", + "google_ai", + "google_vertex", + "azure", + "groq", + "ollama", + "webui", + "webui-legacy", + "lmstudio", + "lmstudio-legacy", + "llamacpp", + "koboldcpp", + "vllm", + "hugging-face", + "mistral", + "together", + "pinecone" + ], + "title": "Embedding Endpoint Type", + "description": "Deprecated: Use 'provider_type' field instead. The endpoint type for the embedding model.", + "deprecated": true + }, + "embedding_endpoint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding Endpoint", + "description": "Deprecated: The endpoint for the model.", + "deprecated": true + }, + "embedding_model": { + "type": "string", + "title": "Embedding Model", + "description": "Deprecated: Use 'name' field instead. Embedding model name.", + "deprecated": true + }, + "embedding_dim": { + "type": "integer", + "title": "Embedding Dim", + "description": "The dimension of the embedding" + }, + "embedding_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Embedding Chunk Size", + "description": "Deprecated: The chunk size of the embedding.", + "default": 300, + "deprecated": true + }, + "batch_size": { + "type": "integer", + "title": "Batch Size", + "description": "Deprecated: The maximum batch size for processing embeddings.", + "default": 32, + "deprecated": true + }, + "azure_endpoint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Azure Endpoint", + "description": "Deprecated: The Azure endpoint for the model.", + "deprecated": true + }, + "azure_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Azure Version", + "description": "Deprecated: The Azure version for the model.", + "deprecated": true + }, + "azure_deployment": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Azure Deployment", + "description": "Deprecated: The Azure deployment for the model.", + "deprecated": true + } + }, + "type": "object", + "required": [ + "name", + "display_name", + "provider_type", + "provider_name", + "embedding_endpoint_type", + "embedding_model", + "embedding_dim" + ], + "title": "EmbeddingModel" + }, + "EventMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "event", + "title": "Message Type", + "default": "event_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "event_type": { + "type": "string", + "const": "compaction", + "title": "Event Type" + }, + "event_data": { + "additionalProperties": true, + "type": "object", + "title": "Event Data" + } + }, + "type": "object", + "required": ["id", "date", "event_type", "event_data"], + "title": "EventMessage", + "description": "A message for notifying the developer that an event that has occured (e.g. a compaction). Events are NOT part of the context window." + }, + "FeedbackType": { + "type": "string", + "enum": ["positive", "negative"], + "title": "FeedbackType" + }, + "File": { + "properties": { + "file": { + "$ref": "#/components/schemas/FileFile" + }, + "type": { + "type": "string", + "const": "file", + "title": "Type" + } + }, + "type": "object", + "required": ["file", "type"], + "title": "File", + "description": "Learn about [file inputs](https://platform.openai.com/docs/guides/text) for text generation." + }, + "FileAgentSchema": { + "properties": { + "agent_id": { + "type": "string", + "title": "Agent Id", + "description": "Unique identifier of the agent." + }, + "file_id": { + "type": "string", + "title": "File Id", + "description": "Unique identifier of the file." + }, + "source_id": { + "type": "string", + "title": "Source Id", + "description": "Deprecated: Use `folder_id` field instead. Unique identifier of the source.", + "deprecated": true + }, + "file_name": { + "type": "string", + "title": "File Name", + "description": "Name of the file." + }, + "is_open": { + "type": "boolean", + "title": "Is Open", + "description": "True if the agent currently has the file open.", + "default": true + }, + "visible_content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Visible Content", + "description": "Portion of the file the agent is focused on (may be large)." + }, + "last_accessed_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Last Accessed At", + "description": "UTC timestamp of the agent's most recent access to this file." + }, + "start_line": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Start Line", + "description": "Starting line number (1-indexed) when file was opened with line range." + }, + "end_line": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "End Line", + "description": "Ending line number (exclusive) when file was opened with line range." + }, + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this file-agent relationship in the file" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["agent_id", "file_id", "source_id", "file_name", "id"], + "title": "FileAgentSchema", + "description": "File-Agent relationship with human-readable ID for agent file" + }, + "FileBlock": { + "properties": { + "value": { + "type": "string", + "title": "Value", + "description": "Value of the block." + }, + "limit": { + "type": "integer", + "title": "Limit", + "description": "Character limit of the block.", + "default": 20000 + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name", + "description": "Name of the block if it is a template." + }, + "is_template": { + "type": "boolean", + "title": "Is Template", + "description": "Whether the block is a template (e.g. saved human/persona options).", + "default": false + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id of the block." + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Entity Id", + "description": "The id of the entity within the template." + }, + "preserve_on_migration": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Preserve On Migration", + "description": "Preserve the block on template migration.", + "default": false + }, + "label": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Label", + "description": "Label of the block (e.g. 'human', 'persona') in the context window." + }, + "read_only": { + "type": "boolean", + "title": "Read Only", + "description": "Whether the agent has read-only access to the block.", + "default": false + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of the block." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata of the block.", + "default": {} + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the block will be hidden." + }, + "id": { + "type": "string", + "pattern": "^block-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Block", + "examples": ["block-123e4567-e89b-12d3-a456-426614174000"] + }, + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this Block." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that last updated this Block." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags associated with the block.", + "default": [] + }, + "file_id": { + "type": "string", + "title": "File Id", + "description": "Unique identifier of the file." + }, + "source_id": { + "type": "string", + "title": "Source Id", + "description": "Deprecated: Use `folder_id` field instead. Unique identifier of the source.", + "deprecated": true + }, + "is_open": { + "type": "boolean", + "title": "Is Open", + "description": "True if the agent currently has the file open." + }, + "last_accessed_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Last Accessed At", + "description": "UTC timestamp of the agent’s most recent access to this file. Any operations from the open, close, or search tools will update this field." + } + }, + "type": "object", + "required": ["value", "file_id", "source_id", "is_open"], + "title": "FileBlock" + }, + "FileFile": { + "properties": { + "file_data": { + "type": "string", + "title": "File Data" + }, + "file_id": { + "type": "string", + "title": "File Id" + }, + "filename": { + "type": "string", + "title": "Filename" + } + }, + "type": "object", + "title": "FileFile" + }, + "FileMetadata": { + "properties": { + "source_id": { + "type": "string", + "title": "Source Id", + "description": "Deprecated: Use `folder_id` field instead. The unique identifier of the source associated with the document.", + "deprecated": true + }, + "file_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Name", + "description": "The name of the file." + }, + "original_file_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Original File Name", + "description": "The original name of the file as uploaded." + }, + "file_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Path", + "description": "The path to the file." + }, + "file_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Type", + "description": "The type of the file (MIME type)." + }, + "file_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "File Size", + "description": "The size of the file in bytes." + }, + "file_creation_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Creation Date", + "description": "The creation date of the file." + }, + "file_last_modified_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Last Modified Date", + "description": "The last modified date of the file." + }, + "processing_status": { + "$ref": "#/components/schemas/FileProcessingStatus", + "description": "The current processing status of the file (e.g. pending, parsing, embedding, completed, error).", + "default": "pending" + }, + "error_message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error Message", + "description": "Optional error message if the file failed processing." + }, + "total_chunks": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Chunks", + "description": "Total number of chunks for the file." + }, + "chunks_embedded": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Chunks Embedded", + "description": "Number of chunks that have been embedded." + }, + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Content", + "description": "Optional full-text content of the file; only populated on demand due to its size." + }, + "id": { + "type": "string", + "pattern": "^file-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the File", + "examples": ["file-123e4567-e89b-12d3-a456-426614174000"] + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The creation date of the file." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The update date of the file." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["source_id"], + "title": "FileMetadata", + "description": "Representation of a single FileMetadata" + }, + "FileProcessingStatus": { + "type": "string", + "enum": ["pending", "parsing", "embedding", "completed", "error"], + "title": "FileProcessingStatus" + }, + "FileSchema": { + "properties": { + "source_id": { + "type": "string", + "title": "Source Id", + "description": "Deprecated: Use `folder_id` field instead. The unique identifier of the source associated with the document.", + "deprecated": true + }, + "file_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Name", + "description": "The name of the file." + }, + "original_file_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Original File Name", + "description": "The original name of the file as uploaded." + }, + "file_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Path", + "description": "The path to the file." + }, + "file_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Type", + "description": "The type of the file (MIME type)." + }, + "file_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "File Size", + "description": "The size of the file in bytes." + }, + "file_creation_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Creation Date", + "description": "The creation date of the file." + }, + "file_last_modified_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Last Modified Date", + "description": "The last modified date of the file." + }, + "processing_status": { + "$ref": "#/components/schemas/FileProcessingStatus", + "description": "The current processing status of the file (e.g. pending, parsing, embedding, completed, error).", + "default": "pending" + }, + "error_message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error Message", + "description": "Optional error message if the file failed processing." + }, + "total_chunks": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Chunks", + "description": "Total number of chunks for the file." + }, + "chunks_embedded": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Chunks Embedded", + "description": "Number of chunks that have been embedded." + }, + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Content", + "description": "Optional full-text content of the file; only populated on demand due to its size." + }, + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this file in the file" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["source_id", "id"], + "title": "FileSchema", + "description": "File with human-readable ID for agent file" + }, + "FileStats": { + "properties": { + "file_id": { + "type": "string", + "title": "File Id", + "description": "Unique identifier of the file" + }, + "file_name": { + "type": "string", + "title": "File Name", + "description": "Name of the file" + }, + "file_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "File Size", + "description": "Size of the file in bytes" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["file_id", "file_name"], + "title": "FileStats", + "description": "File statistics for metadata endpoint" + }, + "Folder": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the folder." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the folder." + }, + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions", + "description": "Instructions for how to use the folder." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata associated with the folder." + }, + "id": { + "type": "string", + "pattern": "^source-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Source", + "examples": ["source-123e4567-e89b-12d3-a456-426614174000"] + }, + "embedding_config": { + "$ref": "#/components/schemas/EmbeddingConfig", + "description": "The embedding configuration used by the folder." + }, + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this Tool." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this Tool." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the folder was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the folder was last updated." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name", "embedding_config"], + "title": "Folder", + "description": "Representation of a folder, which is a collection of files and passages." + }, + "Function-Output": { + "properties": { + "arguments": { + "type": "string", + "title": "Arguments" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["arguments", "name"], + "title": "Function", + "description": "The function that the model called." + }, + "FunctionCall-Input": { + "properties": { + "arguments": { + "type": "string", + "title": "Arguments" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "type": "object", + "required": ["arguments", "name"], + "title": "FunctionCall", + "description": "Deprecated and replaced by `tool_calls`.\n\nThe name and arguments of a function that should be called, as generated by the model." + }, + "FunctionCall-Output": { + "properties": { + "arguments": { + "type": "string", + "title": "Arguments" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["arguments", "name"], + "title": "FunctionCall", + "description": "Deprecated and replaced by `tool_calls`.\n\nThe name and arguments of a function that should be called, as generated by the model." + }, + "FunctionDefinition": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "parameters": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Parameters" + }, + "strict": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Strict" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["name"], + "title": "FunctionDefinition" + }, + "FunctionTool": { + "properties": { + "function": { + "$ref": "#/components/schemas/FunctionDefinition" + }, + "type": { + "type": "string", + "const": "function", + "title": "Type" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["function", "type"], + "title": "FunctionTool" + }, + "GeminiThinkingConfig": { + "properties": { + "include_thoughts": { + "type": "boolean", + "title": "Include Thoughts", + "description": "Whether to include thoughts in the model's response.", + "default": true + }, + "thinking_budget": { + "type": "integer", + "title": "Thinking Budget", + "description": "The thinking budget for the model.", + "default": 1024 + } + }, + "type": "object", + "title": "GeminiThinkingConfig" + }, + "GenerateToolInput": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "Name of the tool to generate code for" + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "User prompt to generate code" + }, + "handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Handle", + "description": "Handle of the tool to generate code for" + }, + "starter_code": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Starter Code", + "description": "Python source code to parse for JSON schema" + }, + "validation_errors": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Validation Errors", + "description": "List of validation errors" + } + }, + "type": "object", + "required": ["tool_name", "prompt", "validation_errors"], + "title": "GenerateToolInput" + }, + "GenerateToolOutput": { + "properties": { + "tool": { + "$ref": "#/components/schemas/Tool", + "description": "Generated tool" + }, + "sample_args": { + "additionalProperties": true, + "type": "object", + "title": "Sample Args", + "description": "Sample arguments for the tool" + }, + "response": { + "type": "string", + "title": "Response", + "description": "Response from the assistant" + } + }, + "type": "object", + "required": ["tool", "sample_args", "response"], + "title": "GenerateToolOutput" + }, + "GoogleAIModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 65536 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "google_ai", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "google_ai" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "thinking_config": { + "$ref": "#/components/schemas/GeminiThinkingConfig", + "description": "The thinking configuration for the model.", + "default": { + "include_thoughts": true, + "thinking_budget": 1024 + } + }, + "response_schema": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Schema", + "description": "The response schema for the model." + } + }, + "type": "object", + "title": "GoogleAIModelSettings" + }, + "GoogleVertexModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 65536 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "google_vertex", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "google_vertex" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "thinking_config": { + "$ref": "#/components/schemas/GeminiThinkingConfig", + "description": "The thinking configuration for the model.", + "default": { + "include_thoughts": true, + "thinking_budget": 1024 + } + }, + "response_schema": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Schema", + "description": "The response schema for the model." + } + }, + "type": "object", + "title": "GoogleVertexModelSettings" + }, + "GroqModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "groq", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "groq" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "GroqModelSettings", + "description": "Groq model configuration (OpenAI-compatible)." + }, + "Group": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the group. Assigned by the database." + }, + "manager_type": { + "$ref": "#/components/schemas/ManagerType", + "description": "" + }, + "agent_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Agent Ids", + "description": "" + }, + "description": { + "type": "string", + "title": "Description", + "description": "" + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id." + }, + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "shared_block_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Shared Block Ids", + "description": "", + "default": [], + "deprecated": true + }, + "manager_agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Manager Agent Id", + "description": "" + }, + "termination_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Termination Token", + "description": "" + }, + "max_turns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Turns", + "description": "" + }, + "sleeptime_agent_frequency": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Sleeptime Agent Frequency", + "description": "" + }, + "turns_counter": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Turns Counter", + "description": "" + }, + "last_processed_message_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Processed Message Id", + "description": "" + }, + "max_message_buffer_length": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Message Buffer Length", + "description": "The desired maximum length of messages in the context window of the convo agent. This is a best effort, and may be off slightly due to user/assistant interleaving." + }, + "min_message_buffer_length": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Min Message Buffer Length", + "description": "The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving." + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the group will be hidden." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["id", "manager_type", "agent_ids", "description"], + "title": "Group" + }, + "GroupCreate": { + "properties": { + "agent_ids": { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array", + "title": "Agent Ids", + "description": "" + }, + "description": { + "type": "string", + "title": "Description", + "description": "" + }, + "manager_config": { + "oneOf": [ + { + "$ref": "#/components/schemas/RoundRobinManager" + }, + { + "$ref": "#/components/schemas/SupervisorManager" + }, + { + "$ref": "#/components/schemas/DynamicManager" + }, + { + "$ref": "#/components/schemas/SleeptimeManager" + }, + { + "$ref": "#/components/schemas/VoiceSleeptimeManager" + } + ], + "title": "Manager Config", + "description": "", + "default": { + "manager_type": "round_robin" + }, + "discriminator": { + "propertyName": "manager_type", + "mapping": { + "dynamic": "#/components/schemas/DynamicManager", + "round_robin": "#/components/schemas/RoundRobinManager", + "sleeptime": "#/components/schemas/SleeptimeManager", + "supervisor": "#/components/schemas/SupervisorManager", + "voice_sleeptime": "#/components/schemas/VoiceSleeptimeManager" + } + } + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "shared_block_ids": { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array", + "title": "Shared Block Ids", + "description": "", + "default": [], + "deprecated": true + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the group will be hidden." + } + }, + "type": "object", + "required": ["agent_ids", "description"], + "title": "GroupCreate" + }, + "GroupSchema": { + "properties": { + "agent_ids": { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array", + "title": "Agent Ids", + "description": "" + }, + "description": { + "type": "string", + "title": "Description", + "description": "" + }, + "manager_config": { + "oneOf": [ + { + "$ref": "#/components/schemas/RoundRobinManager" + }, + { + "$ref": "#/components/schemas/SupervisorManager" + }, + { + "$ref": "#/components/schemas/DynamicManager" + }, + { + "$ref": "#/components/schemas/SleeptimeManager" + }, + { + "$ref": "#/components/schemas/VoiceSleeptimeManager" + } + ], + "title": "Manager Config", + "description": "", + "default": { + "manager_type": "round_robin" + }, + "discriminator": { + "propertyName": "manager_type", + "mapping": { + "dynamic": "#/components/schemas/DynamicManager", + "round_robin": "#/components/schemas/RoundRobinManager", + "sleeptime": "#/components/schemas/SleeptimeManager", + "supervisor": "#/components/schemas/SupervisorManager", + "voice_sleeptime": "#/components/schemas/VoiceSleeptimeManager" + } + } + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "shared_block_ids": { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array", + "title": "Shared Block Ids", + "description": "", + "default": [], + "deprecated": true + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the group will be hidden." + }, + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this group in the file" + } + }, + "type": "object", + "required": ["agent_ids", "description", "id"], + "title": "GroupSchema", + "description": "Group with human-readable ID for agent file" + }, + "GroupUpdate": { + "properties": { + "agent_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Agent Ids", + "description": "" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "" + }, + "manager_config": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/RoundRobinManagerUpdate" + }, + { + "$ref": "#/components/schemas/SupervisorManagerUpdate" + }, + { + "$ref": "#/components/schemas/DynamicManagerUpdate" + }, + { + "$ref": "#/components/schemas/SleeptimeManagerUpdate" + }, + { + "$ref": "#/components/schemas/VoiceSleeptimeManagerUpdate" + } + ], + "discriminator": { + "propertyName": "manager_type", + "mapping": { + "dynamic": "#/components/schemas/DynamicManagerUpdate", + "round_robin": "#/components/schemas/RoundRobinManagerUpdate", + "sleeptime": "#/components/schemas/SleeptimeManagerUpdate", + "supervisor": "#/components/schemas/SupervisorManagerUpdate", + "voice_sleeptime": "#/components/schemas/VoiceSleeptimeManagerUpdate" + } + } + }, + { + "type": "null" + } + ], + "title": "Manager Config", + "description": "" + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "shared_block_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Shared Block Ids", + "description": "", + "deprecated": true + } + }, + "type": "object", + "title": "GroupUpdate" + }, + "HTTPValidationError": { + "properties": { + "detail": { + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "type": "array", + "title": "Detail" + } + }, + "type": "object", + "title": "HTTPValidationError" + }, + "Health": { + "properties": { + "version": { + "type": "string", + "title": "Version" + }, + "status": { + "type": "string", + "title": "Status" + } + }, + "type": "object", + "required": ["version", "status"], + "title": "Health", + "description": "Health check response body" + }, + "HiddenReasoningMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "hidden_reasoning_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "hidden_reasoning_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "state": { + "type": "string", + "enum": ["redacted", "omitted"], + "title": "State" + }, + "hidden_reasoning": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Hidden Reasoning" + } + }, + "type": "object", + "required": ["id", "date", "state"], + "title": "HiddenReasoningMessage", + "description": "Representation of an agent's internal reasoning where reasoning content\nhas been hidden from the response.\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n state (Literal[\"redacted\", \"omitted\"]): Whether the reasoning\n content was redacted by the provider or simply omitted by the API\n hidden_reasoning (Optional[str]): The internal reasoning of the agent" + }, + "Identity": { + "properties": { + "id": { + "type": "string", + "pattern": "^identity-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Identity", + "examples": ["identity-123e4567-e89b-12d3-a456-426614174000"] + }, + "identifier_key": { + "type": "string", + "title": "Identifier Key", + "description": "External, user-generated identifier key of the identity." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the identity." + }, + "identity_type": { + "$ref": "#/components/schemas/IdentityType", + "description": "The type of the identity." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project id of the identity, if applicable." + }, + "agent_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Agent Ids", + "description": "The IDs of the agents associated with the identity.", + "deprecated": true + }, + "block_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Block Ids", + "description": "The IDs of the blocks associated with the identity.", + "deprecated": true + }, + "properties": { + "items": { + "$ref": "#/components/schemas/IdentityProperty" + }, + "type": "array", + "title": "Properties", + "description": "List of properties associated with the identity" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "identifier_key", + "name", + "identity_type", + "agent_ids", + "block_ids" + ], + "title": "Identity" + }, + "IdentityCreate": { + "properties": { + "identifier_key": { + "type": "string", + "title": "Identifier Key", + "description": "External, user-generated identifier key of the identity." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the identity." + }, + "identity_type": { + "$ref": "#/components/schemas/IdentityType", + "description": "The type of the identity." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project id of the identity, if applicable." + }, + "agent_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Agent Ids", + "description": "The agent ids that are associated with the identity.", + "deprecated": true + }, + "block_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Block Ids", + "description": "The IDs of the blocks associated with the identity.", + "deprecated": true + }, + "properties": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/IdentityProperty" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Properties", + "description": "List of properties associated with the identity." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["identifier_key", "name", "identity_type"], + "title": "IdentityCreate" + }, + "IdentityProperty": { + "properties": { + "key": { + "type": "string", + "title": "Key", + "description": "The key of the property" + }, + "value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "additionalProperties": true, + "type": "object" + } + ], + "title": "Value", + "description": "The value of the property" + }, + "type": { + "$ref": "#/components/schemas/IdentityPropertyType", + "description": "The type of the property" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["key", "value", "type"], + "title": "IdentityProperty", + "description": "A property of an identity" + }, + "IdentityPropertyType": { + "type": "string", + "enum": ["string", "number", "boolean", "json"], + "title": "IdentityPropertyType", + "description": "Enum to represent the type of the identity property." + }, + "IdentityType": { + "type": "string", + "enum": ["org", "user", "other"], + "title": "IdentityType", + "description": "Enum to represent the type of the identity." + }, + "IdentityUpdate": { + "properties": { + "identifier_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Identifier Key", + "description": "External, user-generated identifier key of the identity." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the identity." + }, + "identity_type": { + "anyOf": [ + { + "$ref": "#/components/schemas/IdentityType" + }, + { + "type": "null" + } + ], + "description": "The type of the identity." + }, + "agent_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Agent Ids", + "description": "The agent ids that are associated with the identity.", + "deprecated": true + }, + "block_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Block Ids", + "description": "The IDs of the blocks associated with the identity.", + "deprecated": true + }, + "properties": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/IdentityProperty" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Properties", + "description": "List of properties associated with the identity." + } + }, + "additionalProperties": false, + "type": "object", + "title": "IdentityUpdate" + }, + "IdentityUpsert": { + "properties": { + "identifier_key": { + "type": "string", + "title": "Identifier Key", + "description": "External, user-generated identifier key of the identity." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the identity." + }, + "identity_type": { + "$ref": "#/components/schemas/IdentityType", + "description": "The type of the identity." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project id of the identity, if applicable." + }, + "agent_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Agent Ids", + "description": "The agent ids that are associated with the identity.", + "deprecated": true + }, + "block_ids": { + "anyOf": [ + { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Block Ids", + "description": "The IDs of the blocks associated with the identity.", + "deprecated": true + }, + "properties": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/IdentityProperty" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Properties", + "description": "List of properties associated with the identity." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["identifier_key", "name", "identity_type"], + "title": "IdentityUpsert" + }, + "ImageContent": { + "properties": { + "type": { + "type": "string", + "const": "image", + "title": "Type", + "description": "The type of the message.", + "default": "image" + }, + "source": { + "oneOf": [ + { + "$ref": "#/components/schemas/UrlImage" + }, + { + "$ref": "#/components/schemas/Base64Image" + }, + { + "$ref": "#/components/schemas/LettaImage" + } + ], + "title": "Source", + "description": "The source of the image.", + "discriminator": { + "propertyName": "type", + "mapping": { + "base64": "#/components/schemas/Base64Image", + "letta": "#/components/schemas/LettaImage", + "url": "#/components/schemas/UrlImage" + } + } + } + }, + "type": "object", + "required": ["source"], + "title": "ImageContent" + }, + "ImageURL": { + "properties": { + "url": { + "type": "string", + "title": "Url" + }, + "detail": { + "type": "string", + "enum": ["auto", "low", "high"], + "title": "Detail" + } + }, + "type": "object", + "required": ["url"], + "title": "ImageURL" + }, + "ImportedAgentsResponse": { + "properties": { + "agent_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Agent Ids", + "description": "List of IDs of the imported agents" + } + }, + "type": "object", + "required": ["agent_ids"], + "title": "ImportedAgentsResponse", + "description": "Response model for imported agents" + }, + "InitToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "run_first", + "title": "Type", + "default": "run_first" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored). Rendering uses fast built-in formatting for performance." + }, + "args": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Args", + "description": "Optional prefilled arguments for this tool. When present, these values will override any LLM-provided arguments with the same keys during invocation. Keys must match the tool's parameter names and values must satisfy the tool's JSON schema. Supports partial prefill; non-overlapping parameters are left to the model." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name"], + "title": "InitToolRule", + "description": "Represents the initial tool rule configuration." + }, + "InputAudio": { + "properties": { + "data": { + "type": "string", + "title": "Data" + }, + "format": { + "type": "string", + "enum": ["wav", "mp3"], + "title": "Format" + } + }, + "type": "object", + "required": ["data", "format"], + "title": "InputAudio" + }, + "InternalTemplateAgentCreate": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the agent." + }, + "memory_blocks": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/CreateBlock" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Memory Blocks", + "description": "The blocks to create in the agent's in-context memory." + }, + "tools": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools", + "description": "The tools used by the agent." + }, + "tool_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Ids", + "description": "The ids of the tools used by the agent." + }, + "source_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Source Ids", + "description": "Deprecated: Use `folder_ids` field instead. The ids of the sources used by the agent.", + "deprecated": true + }, + "folder_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Folder Ids", + "description": "The ids of the folders used by the agent." + }, + "block_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Block Ids", + "description": "The ids of the blocks used by the agent." + }, + "tool_rules": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/ChildToolRule" + }, + { + "$ref": "#/components/schemas/InitToolRule" + }, + { + "$ref": "#/components/schemas/TerminalToolRule" + }, + { + "$ref": "#/components/schemas/ConditionalToolRule" + }, + { + "$ref": "#/components/schemas/ContinueToolRule" + }, + { + "$ref": "#/components/schemas/RequiredBeforeExitToolRule" + }, + { + "$ref": "#/components/schemas/MaxCountPerStepToolRule" + }, + { + "$ref": "#/components/schemas/ParentToolRule" + }, + { + "$ref": "#/components/schemas/RequiresApprovalToolRule" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "conditional": "#/components/schemas/ConditionalToolRule", + "constrain_child_tools": "#/components/schemas/ChildToolRule", + "continue_loop": "#/components/schemas/ContinueToolRule", + "exit_loop": "#/components/schemas/TerminalToolRule", + "max_count_per_step": "#/components/schemas/MaxCountPerStepToolRule", + "parent_last_tool": "#/components/schemas/ParentToolRule", + "required_before_exit": "#/components/schemas/RequiredBeforeExitToolRule", + "requires_approval": "#/components/schemas/RequiresApprovalToolRule", + "run_first": "#/components/schemas/InitToolRule" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Rules", + "description": "The tool rules governing the agent." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags associated with the agent." + }, + "system": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System", + "description": "The system prompt used by the agent." + }, + "agent_type": { + "$ref": "#/components/schemas/AgentType", + "description": "The type of agent." + }, + "initial_message_sequence": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageCreate" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Initial Message Sequence", + "description": "The initial set of messages to put in the agent's in-context memory." + }, + "include_base_tools": { + "type": "boolean", + "title": "Include Base Tools", + "description": "If true, attaches the Letta core tools (e.g. core_memory related functions).", + "default": true + }, + "include_multi_agent_tools": { + "type": "boolean", + "title": "Include Multi Agent Tools", + "description": "If true, attaches the Letta multi-agent tools (e.g. sending a message to another agent).", + "default": false + }, + "include_base_tool_rules": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Base Tool Rules", + "description": "If true, attaches the Letta base tool rules (e.g. deny all tools not explicitly allowed)." + }, + "include_default_source": { + "type": "boolean", + "title": "Include Default Source", + "description": "If true, automatically creates and attaches a default data source for this agent.", + "default": false, + "deprecated": true + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the agent." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the agent." + }, + "llm_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LLMConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `model` field instead. The LLM configuration used by the agent.", + "deprecated": true + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", + "deprecated": true + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle for the agent to use (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/ZAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + }, + { + "$ref": "#/components/schemas/OpenRouterModelSettings" + }, + { + "$ref": "#/components/schemas/ChatGPTOAuthModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "chatgpt_oauth": "#/components/schemas/ChatGPTOAuthModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "openrouter": "#/components/schemas/OpenRouterModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings", + "zai": "#/components/schemas/ZAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "The model settings for the agent." + }, + "compaction_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionSettings-Input" + }, + { + "type": "null" + } + ], + "description": "The compaction settings configuration used for compaction." + }, + "context_window_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Context Window Limit", + "description": "The context window limit used by the agent." + }, + "embedding_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Embedding Chunk Size", + "description": "Deprecated: No longer used. The embedding chunk size used by the agent.", + "default": 300, + "deprecated": true + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "Deprecated: Use `model` field to configure max output tokens instead. The maximum number of tokens to generate, including reasoning step.", + "deprecated": true + }, + "max_reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Reasoning Tokens", + "description": "Deprecated: Use `model` field to configure reasoning tokens instead. The maximum number of tokens to generate for reasoning step.", + "deprecated": true + }, + "enable_reasoner": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Reasoner", + "description": "Deprecated: Use `model` field to configure reasoning instead. Whether to enable internal extended thinking step for a reasoner model.", + "default": true, + "deprecated": true + }, + "reasoning": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Reasoning", + "description": "Deprecated: Use `model` field to configure reasoning instead. Whether to enable reasoning for this agent.", + "deprecated": true + }, + "from_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "From Template", + "description": "Deprecated: please use the 'create agents from a template' endpoint instead.", + "deprecated": true + }, + "template": { + "type": "boolean", + "title": "Template", + "description": "Deprecated: No longer used.", + "default": false, + "deprecated": true + }, + "project": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project", + "description": "Deprecated: Project should now be passed via the X-Project header instead of in the request body. If using the SDK, this can be done via the x_project parameter.", + "deprecated": true + }, + "tool_exec_environment_variables": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Tool Exec Environment Variables", + "description": "Deprecated: Use `secrets` field instead. Environment variables for tool execution.", + "deprecated": true + }, + "secrets": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Secrets", + "description": "The environment variables for tool execution specific to this agent." + }, + "memory_variables": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Memory Variables", + "description": "Deprecated: Only relevant for creating agents from a template. Use the 'create agents from a template' endpoint instead.", + "deprecated": true + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "Deprecated: No longer used. The id of the project the agent belongs to.", + "deprecated": true + }, + "template_id": { + "type": "string", + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "type": "string", + "title": "Base Template Id", + "description": "The id of the base template." + }, + "identity_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Identity Ids", + "description": "The ids of the identities associated with this agent." + }, + "message_buffer_autoclear": { + "type": "boolean", + "title": "Message Buffer Autoclear", + "description": "If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.", + "default": false + }, + "enable_sleeptime": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Sleeptime", + "description": "If set to True, memory management will move to a background agent thread." + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + "deprecated": true + }, + "timezone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The timezone of the agent (IANA format)." + }, + "max_files_open": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Files Open", + "description": "Maximum number of files that can be open at once for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "per_file_view_window_char_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Per File View Window Char Limit", + "description": "The per-file view window character limit for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "Deprecated: No longer used. If set to True, the agent will be hidden.", + "deprecated": true + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Parallel Tool Calls", + "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.", + "deprecated": true + }, + "deployment_id": { + "type": "string", + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "type": "string", + "title": "Entity Id", + "description": "The id of the entity within the template." + } + }, + "type": "object", + "required": [ + "template_id", + "base_template_id", + "deployment_id", + "entity_id" + ], + "title": "InternalTemplateAgentCreate", + "description": "Used for Letta Cloud" + }, + "InternalTemplateBlockCreate": { + "properties": { + "value": { + "type": "string", + "title": "Value", + "description": "Value of the block." + }, + "limit": { + "type": "integer", + "title": "Limit", + "description": "Character limit of the block.", + "default": 20000 + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "template_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Name", + "description": "Name of the block if it is a template." + }, + "is_template": { + "type": "boolean", + "title": "Is Template", + "default": false + }, + "template_id": { + "type": "string", + "title": "Template Id", + "description": "The id of the template." + }, + "base_template_id": { + "type": "string", + "title": "Base Template Id", + "description": "The id of the base template." + }, + "deployment_id": { + "type": "string", + "title": "Deployment Id", + "description": "The id of the deployment." + }, + "entity_id": { + "type": "string", + "title": "Entity Id", + "description": "The id of the entity within the template." + }, + "preserve_on_migration": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Preserve On Migration", + "description": "Preserve the block on template migration.", + "default": false + }, + "label": { + "type": "string", + "title": "Label", + "description": "Label of the block." + }, + "read_only": { + "type": "boolean", + "title": "Read Only", + "description": "Whether the agent has read-only access to the block.", + "default": false + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of the block." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata of the block.", + "default": {} + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the block will be hidden." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags to associate with the block." + } + }, + "type": "object", + "required": [ + "value", + "template_id", + "base_template_id", + "deployment_id", + "entity_id", + "label" + ], + "title": "InternalTemplateBlockCreate", + "description": "Used for Letta Cloud" + }, + "InternalTemplateGroupCreate": { + "properties": { + "agent_ids": { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array", + "title": "Agent Ids", + "description": "" + }, + "description": { + "type": "string", + "title": "Description", + "description": "" + }, + "manager_config": { + "oneOf": [ + { + "$ref": "#/components/schemas/RoundRobinManager" + }, + { + "$ref": "#/components/schemas/SupervisorManager" + }, + { + "$ref": "#/components/schemas/DynamicManager" + }, + { + "$ref": "#/components/schemas/SleeptimeManager" + }, + { + "$ref": "#/components/schemas/VoiceSleeptimeManager" + } + ], + "title": "Manager Config", + "description": "", + "default": { + "manager_type": "round_robin" + }, + "discriminator": { + "propertyName": "manager_type", + "mapping": { + "dynamic": "#/components/schemas/DynamicManager", + "round_robin": "#/components/schemas/RoundRobinManager", + "sleeptime": "#/components/schemas/SleeptimeManager", + "supervisor": "#/components/schemas/SupervisorManager", + "voice_sleeptime": "#/components/schemas/VoiceSleeptimeManager" + } + } + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The associated project id." + }, + "shared_block_ids": { + "items": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^block-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the block in the format 'block-'", + "examples": ["block-123e4567-e89b-42d3-8456-426614174000"] + }, + "type": "array", + "title": "Shared Block Ids", + "description": "", + "default": [], + "deprecated": true + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the group will be hidden." + }, + "base_template_id": { + "type": "string", + "title": "Base Template Id", + "description": "The id of the base template." + }, + "template_id": { + "type": "string", + "title": "Template Id", + "description": "The id of the template." + }, + "deployment_id": { + "type": "string", + "title": "Deployment Id", + "description": "The id of the deployment." + } + }, + "type": "object", + "required": [ + "agent_ids", + "description", + "base_template_id", + "template_id", + "deployment_id" + ], + "title": "InternalTemplateGroupCreate", + "description": "Used for Letta Cloud" + }, + "Job": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The unix timestamp of when the job was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "status": { + "$ref": "#/components/schemas/JobStatus", + "description": "The status of the job.", + "default": "created" + }, + "completed_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Completed At", + "description": "The unix timestamp of when the job was completed." + }, + "stop_reason": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "The reason why the job was stopped." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the job." + }, + "job_type": { + "$ref": "#/components/schemas/JobType", + "description": "The type of the job.", + "default": "job" + }, + "background": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Background", + "description": "Whether the job was created in background mode." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The agent associated with this job/run." + }, + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Callback Url", + "description": "If set, POST to this URL when the job completes." + }, + "callback_sent_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Callback Sent At", + "description": "Timestamp when the callback was last attempted." + }, + "callback_status_code": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Callback Status Code", + "description": "HTTP status code returned by the callback endpoint." + }, + "callback_error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Callback Error", + "description": "Optional error message from attempting to POST the callback endpoint." + }, + "ttft_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Ttft Ns", + "description": "Time to first token for a run in nanoseconds" + }, + "total_duration_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Duration Ns", + "description": "Total run duration in nanoseconds" + }, + "id": { + "type": "string", + "pattern": "^(job|run)-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Job", + "examples": ["job-123e4567-e89b-12d3-a456-426614174000"] + } + }, + "additionalProperties": false, + "type": "object", + "title": "Job", + "description": "Representation of offline jobs, used for tracking status of data loading tasks (involving parsing and embedding files)." + }, + "JobStatus": { + "type": "string", + "enum": [ + "created", + "running", + "completed", + "failed", + "pending", + "cancelled", + "expired" + ], + "title": "JobStatus", + "description": "Status of the job." + }, + "JobType": { + "type": "string", + "enum": ["job", "run", "batch"], + "title": "JobType" + }, + "JsonObjectResponseFormat": { + "properties": { + "type": { + "type": "string", + "const": "json_object", + "title": "Type", + "description": "The type of the response format.", + "default": "json_object" + } + }, + "type": "object", + "title": "JsonObjectResponseFormat", + "description": "Response format for JSON object responses." + }, + "JsonSchemaResponseFormat": { + "properties": { + "type": { + "type": "string", + "const": "json_schema", + "title": "Type", + "description": "The type of the response format.", + "default": "json_schema" + }, + "json_schema": { + "additionalProperties": true, + "type": "object", + "title": "Json Schema", + "description": "The JSON schema of the response." + } + }, + "type": "object", + "required": ["json_schema"], + "title": "JsonSchemaResponseFormat", + "description": "Response format for JSON schema-based responses." + }, + "LLMConfig": { + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "LLM model name. " + }, + "display_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Display Name", + "description": "A human-friendly display name for the model." + }, + "model_endpoint_type": { + "type": "string", + "enum": [ + "openai", + "anthropic", + "google_ai", + "google_vertex", + "azure", + "groq", + "ollama", + "webui", + "webui-legacy", + "lmstudio", + "lmstudio-legacy", + "lmstudio-chatcompletions", + "llamacpp", + "koboldcpp", + "vllm", + "hugging-face", + "minimax", + "mistral", + "together", + "bedrock", + "deepseek", + "xai", + "zai", + "openrouter", + "chatgpt_oauth" + ], + "title": "Model Endpoint Type", + "description": "The endpoint type for the model." + }, + "model_endpoint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model Endpoint", + "description": "The endpoint for the model." + }, + "provider_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider Name", + "description": "The provider name for the model." + }, + "provider_category": { + "anyOf": [ + { + "$ref": "#/components/schemas/ProviderCategory" + }, + { + "type": "null" + } + ], + "description": "The provider category for the model." + }, + "model_wrapper": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model Wrapper", + "description": "The wrapper for the model." + }, + "context_window": { + "type": "integer", + "title": "Context Window", + "description": "The context window size for the model." + }, + "put_inner_thoughts_in_kwargs": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Put Inner Thoughts In Kwargs", + "description": "Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.", + "default": false + }, + "handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Handle", + "description": "The handle for this config, in the format provider/model-name." + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature to use when generating text with the model. A higher temperature will result in more random text.", + "default": 1 + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "The maximum number of tokens to generate. If not set, the model will use its default value." + }, + "enable_reasoner": { + "type": "boolean", + "title": "Enable Reasoner", + "description": "Whether or not the model should use extended thinking if it is a 'reasoning' style model", + "default": true + }, + "reasoning_effort": { + "anyOf": [ + { + "type": "string", + "enum": ["none", "minimal", "low", "medium", "high", "xhigh"] + }, + { + "type": "null" + } + ], + "title": "Reasoning Effort", + "description": "The reasoning effort to use when generating text reasoning models" + }, + "max_reasoning_tokens": { + "type": "integer", + "title": "Max Reasoning Tokens", + "description": "Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.", + "default": 0 + }, + "effort": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Effort", + "description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'." + }, + "frequency_penalty": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Frequency Penalty", + "description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0." + }, + "compatibility_type": { + "anyOf": [ + { + "type": "string", + "enum": ["gguf", "mlx"] + }, + { + "type": "null" + } + ], + "title": "Compatibility Type", + "description": "The framework compatibility type for the model." + }, + "verbosity": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Verbosity", + "description": "Soft control for how verbose model output should be, used for GPT-5 models." + }, + "tier": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Tier", + "description": "The cost tier for the model (cloud only)." + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Parallel Tool Calls", + "description": "Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.", + "default": false, + "deprecated": true + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings." + }, + "strict": { + "type": "boolean", + "title": "Strict", + "description": "Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.", + "default": false + } + }, + "type": "object", + "required": ["model", "model_endpoint_type", "context_window"], + "title": "LLMConfig", + "description": "Configuration for Language Model (LLM) connection and generation parameters.\n\n.. deprecated::\n LLMConfig is deprecated and should not be used as an input or return type in API calls.\n Use the schemas in letta.schemas.model (ModelSettings, OpenAIModelSettings, etc.) instead.\n For conversion, use the _to_model() method or Model._from_llm_config() method." + }, + "LettaAsyncRequest": { + "properties": { + "messages": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/MessageCreate" + }, + { + "$ref": "#/components/schemas/ApprovalCreate" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Messages", + "description": "The messages to be sent to the agent." + }, + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + }, + { + "$ref": "#/components/schemas/ToolCallContent" + }, + { + "$ref": "#/components/schemas/ToolReturnContent" + }, + { + "$ref": "#/components/schemas/ReasoningContent" + }, + { + "$ref": "#/components/schemas/RedactedReasoningContent" + }, + { + "$ref": "#/components/schemas/OmittedReasoningContent" + }, + { + "$ref": "#/components/schemas/SummarizedReasoningContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "omitted_reasoning": "#/components/schemas/OmittedReasoningContent", + "reasoning": "#/components/schemas/ReasoningContent", + "redacted_reasoning": "#/components/schemas/RedactedReasoningContent", + "summarized_reasoning": "#/components/schemas/SummarizedReasoningContent", + "text": "#/components/schemas/TextContent", + "tool_call": "#/components/schemas/ToolCallContent", + "tool_return": "#/components/schemas/ToolReturnContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Input", + "description": "Syntactic sugar for a single user message. Equivalent to messages=[{'role': 'user', 'content': input}]." + }, + "max_steps": { + "type": "integer", + "title": "Max Steps", + "description": "Maximum number of steps the agent should take to process the request.", + "default": 50 + }, + "use_assistant_message": { + "type": "boolean", + "title": "Use Assistant Message", + "description": "Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": true, + "deprecated": true + }, + "assistant_message_tool_name": { + "type": "string", + "title": "Assistant Message Tool Name", + "description": "The name of the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "send_message", + "deprecated": true + }, + "assistant_message_tool_kwarg": { + "type": "string", + "title": "Assistant Message Tool Kwarg", + "description": "The name of the message argument in the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "message", + "deprecated": true + }, + "include_return_message_types": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Include Return Message Types", + "description": "Only return specified message types in the response. If `None` (default) returns all messages." + }, + "enable_thinking": { + "type": "string", + "title": "Enable Thinking", + "description": "If set to True, enables reasoning before responses or tool calls from the agent.", + "default": true, + "deprecated": true + }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, + "override_model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Model", + "description": "Model handle to use for this request instead of the agent's default model. This allows sending a message to a different model without changing the agent's configuration." + }, + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Callback Url", + "description": "Optional callback URL to POST to when the job completes" + } + }, + "type": "object", + "title": "LettaAsyncRequest" + }, + "LettaBatchMessages": { + "properties": { + "messages": { + "items": { + "$ref": "#/components/schemas/Message" + }, + "type": "array", + "title": "Messages" + } + }, + "type": "object", + "required": ["messages"], + "title": "LettaBatchMessages" + }, + "LettaBatchRequest": { + "properties": { + "messages": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/MessageCreate" + }, + { + "$ref": "#/components/schemas/ApprovalCreate" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Messages", + "description": "The messages to be sent to the agent." + }, + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + }, + { + "$ref": "#/components/schemas/ToolCallContent" + }, + { + "$ref": "#/components/schemas/ToolReturnContent" + }, + { + "$ref": "#/components/schemas/ReasoningContent" + }, + { + "$ref": "#/components/schemas/RedactedReasoningContent" + }, + { + "$ref": "#/components/schemas/OmittedReasoningContent" + }, + { + "$ref": "#/components/schemas/SummarizedReasoningContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "omitted_reasoning": "#/components/schemas/OmittedReasoningContent", + "reasoning": "#/components/schemas/ReasoningContent", + "redacted_reasoning": "#/components/schemas/RedactedReasoningContent", + "summarized_reasoning": "#/components/schemas/SummarizedReasoningContent", + "text": "#/components/schemas/TextContent", + "tool_call": "#/components/schemas/ToolCallContent", + "tool_return": "#/components/schemas/ToolReturnContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Input", + "description": "Syntactic sugar for a single user message. Equivalent to messages=[{'role': 'user', 'content': input}]." + }, + "max_steps": { + "type": "integer", + "title": "Max Steps", + "description": "Maximum number of steps the agent should take to process the request.", + "default": 50 + }, + "use_assistant_message": { + "type": "boolean", + "title": "Use Assistant Message", + "description": "Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": true, + "deprecated": true + }, + "assistant_message_tool_name": { + "type": "string", + "title": "Assistant Message Tool Name", + "description": "The name of the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "send_message", + "deprecated": true + }, + "assistant_message_tool_kwarg": { + "type": "string", + "title": "Assistant Message Tool Kwarg", + "description": "The name of the message argument in the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "message", + "deprecated": true + }, + "include_return_message_types": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Include Return Message Types", + "description": "Only return specified message types in the response. If `None` (default) returns all messages." + }, + "enable_thinking": { + "type": "string", + "title": "Enable Thinking", + "description": "If set to True, enables reasoning before responses or tool calls from the agent.", + "default": true, + "deprecated": true + }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, + "override_model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Model", + "description": "Model handle to use for this request instead of the agent's default model. This allows sending a message to a different model without changing the agent's configuration." + }, + "agent_id": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "title": "Agent Id", + "description": "The ID of the agent to send this batch request for", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + } + }, + "type": "object", + "required": ["agent_id"], + "title": "LettaBatchRequest" + }, + "LettaErrorMessage": { + "properties": { + "message_type": { + "type": "string", + "const": "error_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "error_message" + }, + "run_id": { + "type": "string", + "title": "Run ID", + "description": "The ID of the run." + }, + "error_type": { + "type": "string", + "title": "Error Type", + "description": "The type of error." + }, + "message": { + "type": "string", + "title": "Message", + "description": "The error message." + }, + "detail": { + "type": "string", + "title": "Detail", + "description": "An optional error detail." + }, + "seq_id": { + "type": "integer", + "title": "Seq ID", + "description": "The sequence ID for cursor-based pagination." + } + }, + "type": "object", + "required": ["message_type", "run_id", "error_type", "message"], + "title": "LettaErrorMessage", + "description": "Error messages are used to notify the client of an error that occurred during the agent's execution." + }, + "LettaImage": { + "properties": { + "type": { + "type": "string", + "const": "letta", + "title": "Type", + "description": "The source type for the image.", + "default": "letta" + }, + "file_id": { + "type": "string", + "title": "File Id", + "description": "The unique identifier of the image file persisted in storage." + }, + "media_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Media Type", + "description": "The media type for the image." + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "The base64 encoded image data." + }, + "detail": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Detail", + "description": "What level of detail to use when processing and understanding the image (low, high, or auto to let the model decide)" + } + }, + "type": "object", + "required": ["file_id"], + "title": "LettaImage" + }, + "LettaPing": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "ping", + "title": "Message Type", + "description": "The type of the message. Ping messages are a keep-alive to prevent SSE streams from timing out during long running requests.", + "default": "ping" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + } + }, + "type": "object", + "required": ["id", "date"], + "title": "LettaPing", + "description": "A ping message used as a keepalive to prevent SSE streams from timing out during long running requests.\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format" + }, + "LettaRequest": { + "properties": { + "messages": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/MessageCreate" + }, + { + "$ref": "#/components/schemas/ApprovalCreate" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Messages", + "description": "The messages to be sent to the agent." + }, + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + }, + { + "$ref": "#/components/schemas/ToolCallContent" + }, + { + "$ref": "#/components/schemas/ToolReturnContent" + }, + { + "$ref": "#/components/schemas/ReasoningContent" + }, + { + "$ref": "#/components/schemas/RedactedReasoningContent" + }, + { + "$ref": "#/components/schemas/OmittedReasoningContent" + }, + { + "$ref": "#/components/schemas/SummarizedReasoningContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "omitted_reasoning": "#/components/schemas/OmittedReasoningContent", + "reasoning": "#/components/schemas/ReasoningContent", + "redacted_reasoning": "#/components/schemas/RedactedReasoningContent", + "summarized_reasoning": "#/components/schemas/SummarizedReasoningContent", + "text": "#/components/schemas/TextContent", + "tool_call": "#/components/schemas/ToolCallContent", + "tool_return": "#/components/schemas/ToolReturnContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Input", + "description": "Syntactic sugar for a single user message. Equivalent to messages=[{'role': 'user', 'content': input}]." + }, + "max_steps": { + "type": "integer", + "title": "Max Steps", + "description": "Maximum number of steps the agent should take to process the request.", + "default": 50 + }, + "use_assistant_message": { + "type": "boolean", + "title": "Use Assistant Message", + "description": "Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": true, + "deprecated": true + }, + "assistant_message_tool_name": { + "type": "string", + "title": "Assistant Message Tool Name", + "description": "The name of the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "send_message", + "deprecated": true + }, + "assistant_message_tool_kwarg": { + "type": "string", + "title": "Assistant Message Tool Kwarg", + "description": "The name of the message argument in the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "message", + "deprecated": true + }, + "include_return_message_types": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Include Return Message Types", + "description": "Only return specified message types in the response. If `None` (default) returns all messages." + }, + "enable_thinking": { + "type": "string", + "title": "Enable Thinking", + "description": "If set to True, enables reasoning before responses or tool calls from the agent.", + "default": true, + "deprecated": true + }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, + "override_model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Model", + "description": "Model handle to use for this request instead of the agent's default model. This allows sending a message to a different model without changing the agent's configuration." + } + }, + "type": "object", + "title": "LettaRequest" + }, + "LettaRequestConfig": { + "properties": { + "use_assistant_message": { + "type": "boolean", + "title": "Use Assistant Message", + "description": "Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects.", + "default": true + }, + "assistant_message_tool_name": { + "type": "string", + "title": "Assistant Message Tool Name", + "description": "The name of the designated message tool.", + "default": "send_message" + }, + "assistant_message_tool_kwarg": { + "type": "string", + "title": "Assistant Message Tool Kwarg", + "description": "The name of the message argument in the designated message tool.", + "default": "message" + }, + "include_return_message_types": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Include Return Message Types", + "description": "Only return specified message types in the response. If `None` (default) returns all messages." + } + }, + "type": "object", + "title": "LettaRequestConfig" + }, + "LettaResponse": { + "properties": { + "messages": { + "items": { + "$ref": "#/components/schemas/LettaMessageUnion" + }, + "type": "array", + "title": "Messages", + "description": "The messages returned by the agent." + }, + "stop_reason": { + "$ref": "#/components/schemas/LettaStopReason" + }, + "usage": { + "$ref": "#/components/schemas/LettaUsageStatistics", + "description": "The usage statistics of the agent." + } + }, + "type": "object", + "required": ["messages", "stop_reason", "usage"], + "title": "LettaResponse", + "description": "Response object from an agent interaction, consisting of the new messages generated by the agent and usage statistics.\nThe type of the returned messages can be either `Message` or `LettaMessage`, depending on what was specified in the request.\n\nAttributes:\n messages (List[Union[Message, LettaMessage]]): The messages returned by the agent.\n usage (LettaUsageStatistics): The usage statistics" + }, + "LettaStopReason": { + "properties": { + "message_type": { + "type": "string", + "const": "stop_reason", + "title": "Message Type", + "description": "The type of the message.", + "default": "stop_reason" + }, + "stop_reason": { + "$ref": "#/components/schemas/StopReasonType", + "description": "The reason why execution stopped." + } + }, + "type": "object", + "required": ["stop_reason"], + "title": "LettaStopReason", + "description": "The stop reason from Letta indicating why agent loop stopped execution." + }, + "LettaStreamingRequest": { + "properties": { + "messages": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/MessageCreate" + }, + { + "$ref": "#/components/schemas/ApprovalCreate" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Messages", + "description": "The messages to be sent to the agent." + }, + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + }, + { + "$ref": "#/components/schemas/ToolCallContent" + }, + { + "$ref": "#/components/schemas/ToolReturnContent" + }, + { + "$ref": "#/components/schemas/ReasoningContent" + }, + { + "$ref": "#/components/schemas/RedactedReasoningContent" + }, + { + "$ref": "#/components/schemas/OmittedReasoningContent" + }, + { + "$ref": "#/components/schemas/SummarizedReasoningContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "omitted_reasoning": "#/components/schemas/OmittedReasoningContent", + "reasoning": "#/components/schemas/ReasoningContent", + "redacted_reasoning": "#/components/schemas/RedactedReasoningContent", + "summarized_reasoning": "#/components/schemas/SummarizedReasoningContent", + "text": "#/components/schemas/TextContent", + "tool_call": "#/components/schemas/ToolCallContent", + "tool_return": "#/components/schemas/ToolReturnContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Input", + "description": "Syntactic sugar for a single user message. Equivalent to messages=[{'role': 'user', 'content': input}]." + }, + "max_steps": { + "type": "integer", + "title": "Max Steps", + "description": "Maximum number of steps the agent should take to process the request.", + "default": 50 + }, + "use_assistant_message": { + "type": "boolean", + "title": "Use Assistant Message", + "description": "Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": true, + "deprecated": true + }, + "assistant_message_tool_name": { + "type": "string", + "title": "Assistant Message Tool Name", + "description": "The name of the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "send_message", + "deprecated": true + }, + "assistant_message_tool_kwarg": { + "type": "string", + "title": "Assistant Message Tool Kwarg", + "description": "The name of the message argument in the designated message tool. Still supported for legacy agent types, but deprecated for letta_v1_agent onward.", + "default": "message", + "deprecated": true + }, + "include_return_message_types": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Include Return Message Types", + "description": "Only return specified message types in the response. If `None` (default) returns all messages." + }, + "enable_thinking": { + "type": "string", + "title": "Enable Thinking", + "description": "If set to True, enables reasoning before responses or tool calls from the agent.", + "default": true, + "deprecated": true + }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, + "override_model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Override Model", + "description": "Model handle to use for this request instead of the agent's default model. This allows sending a message to a different model without changing the agent's configuration." + }, + "streaming": { + "type": "boolean", + "title": "Streaming", + "description": "If True, returns a streaming response (Server-Sent Events). If False (default), returns a complete response.", + "default": false + }, + "stream_tokens": { + "type": "boolean", + "title": "Stream Tokens", + "description": "Flag to determine if individual tokens should be streamed, rather than streaming per step (only used when streaming=true).", + "default": false + }, + "include_pings": { + "type": "boolean", + "title": "Include Pings", + "description": "Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts (only used when streaming=true).", + "default": true + }, + "background": { + "type": "boolean", + "title": "Background", + "description": "Whether to process the request in the background (only used when streaming=true).", + "default": false + } + }, + "type": "object", + "title": "LettaStreamingRequest" + }, + "LettaStreamingResponse": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ReasoningMessage" + }, + { + "$ref": "#/components/schemas/HiddenReasoningMessage" + }, + { + "$ref": "#/components/schemas/ToolCallMessage" + }, + { + "$ref": "#/components/schemas/ToolReturnMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "$ref": "#/components/schemas/ApprovalResponseMessage" + }, + { + "$ref": "#/components/schemas/LettaPing" + }, + { + "$ref": "#/components/schemas/LettaErrorMessage" + }, + { + "$ref": "#/components/schemas/LettaStopReason" + }, + { + "$ref": "#/components/schemas/LettaUsageStatistics" + } + ], + "title": "LettaStreamingResponse", + "description": "Streaming response type for Server-Sent Events (SSE) endpoints.\nEach event in the stream will be one of these types.", + "discriminator": { + "propertyName": "message_type", + "mapping": { + "approval_request_message": "#/components/schemas/ApprovalRequestMessage", + "approval_response_message": "#/components/schemas/ApprovalResponseMessage", + "assistant_message": "#/components/schemas/AssistantMessage", + "error_message": "#/components/schemas/LettaErrorMessage", + "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage", + "ping": "#/components/schemas/LettaPing", + "reasoning_message": "#/components/schemas/ReasoningMessage", + "stop_reason": "#/components/schemas/LettaStopReason", + "system_message": "#/components/schemas/SystemMessage", + "tool_call_message": "#/components/schemas/ToolCallMessage", + "tool_return_message": "#/components/schemas/ToolReturnMessage", + "usage_statistics": "#/components/schemas/LettaUsageStatistics", + "user_message": "#/components/schemas/UserMessage" + } + } + }, + "LettaUsageStatistics": { + "properties": { + "message_type": { + "type": "string", + "const": "usage_statistics", + "title": "Message Type", + "default": "usage_statistics" + }, + "completion_tokens": { + "type": "integer", + "title": "Completion Tokens", + "description": "The number of tokens generated by the agent.", + "default": 0 + }, + "prompt_tokens": { + "type": "integer", + "title": "Prompt Tokens", + "description": "The number of tokens in the prompt.", + "default": 0 + }, + "total_tokens": { + "type": "integer", + "title": "Total Tokens", + "description": "The total number of tokens processed by the agent.", + "default": 0 + }, + "step_count": { + "type": "integer", + "title": "Step Count", + "description": "The number of steps taken by the agent.", + "default": 0 + }, + "run_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Run Ids", + "description": "The background task run IDs associated with the agent interaction" + }, + "cached_input_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cached Input Tokens", + "description": "The number of input tokens served from cache. None if not reported by provider." + }, + "cache_write_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cache Write Tokens", + "description": "The number of input tokens written to cache (Anthropic only). None if not reported by provider." + }, + "reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Reasoning Tokens", + "description": "The number of reasoning/thinking tokens generated. None if not reported by provider." + } + }, + "type": "object", + "title": "LettaUsageStatistics", + "description": "Usage statistics for the agent interaction.\n\nAttributes:\n completion_tokens (int): The number of tokens generated by the agent.\n prompt_tokens (int): The number of tokens in the prompt.\n total_tokens (int): The total number of tokens processed by the agent.\n step_count (int): The number of steps taken by the agent.\n cached_input_tokens (Optional[int]): The number of input tokens served from cache. None if not reported.\n cache_write_tokens (Optional[int]): The number of input tokens written to cache. None if not reported.\n reasoning_tokens (Optional[int]): The number of reasoning/thinking tokens generated. None if not reported." + }, + "ListDeploymentEntitiesResponse": { + "properties": { + "entities": { + "items": { + "$ref": "#/components/schemas/DeploymentEntity" + }, + "type": "array", + "title": "Entities", + "default": [] + }, + "total_count": { + "type": "integer", + "title": "Total Count" + }, + "deployment_id": { + "type": "string", + "title": "Deployment Id" + }, + "message": { + "type": "string", + "title": "Message" + } + }, + "type": "object", + "required": ["total_count", "deployment_id", "message"], + "title": "ListDeploymentEntitiesResponse", + "description": "Response model for listing deployment entities." + }, + "LocalSandboxConfig": { + "properties": { + "sandbox_dir": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sandbox Dir", + "description": "Directory for the sandbox environment." + }, + "use_venv": { + "type": "boolean", + "title": "Use Venv", + "description": "Whether or not to use the venv, or run directly in the same run loop.", + "default": false + }, + "venv_name": { + "type": "string", + "title": "Venv Name", + "description": "The name for the venv in the sandbox directory. We first search for an existing venv with this name, otherwise, we make it from the requirements.txt.", + "default": "venv" + }, + "pip_requirements": { + "items": { + "$ref": "#/components/schemas/PipRequirement" + }, + "type": "array", + "title": "Pip Requirements", + "description": "List of pip packages to install with mandatory name and optional version following semantic versioning. This only is considered when use_venv is True." + } + }, + "type": "object", + "title": "LocalSandboxConfig" + }, + "MCPServerSchema": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable MCP server ID" + }, + "server_type": { + "type": "string", + "title": "Server Type" + }, + "server_name": { + "type": "string", + "title": "Server Name" + }, + "server_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Url" + }, + "stdio_config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Stdio Config" + }, + "metadata_": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata" + } + }, + "type": "object", + "required": ["id", "server_type", "server_name"], + "title": "MCPServerSchema", + "description": "MCP server schema for agent files with remapped ID." + }, + "MCPServerType": { + "type": "string", + "enum": ["sse", "stdio", "streamable_http"], + "title": "MCPServerType" + }, + "MCPTool": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "title": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Title" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "inputSchema": { + "additionalProperties": true, + "type": "object", + "title": "Inputschema" + }, + "outputSchema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Outputschema" + }, + "annotations": { + "anyOf": [ + { + "$ref": "#/components/schemas/ToolAnnotations" + }, + { + "type": "null" + } + ] + }, + "_meta": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Meta" + }, + "health": { + "anyOf": [ + { + "$ref": "#/components/schemas/MCPToolHealth" + }, + { + "type": "null" + } + ], + "description": "Schema health status for OpenAI strict mode" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["name", "inputSchema"], + "title": "MCPTool", + "description": "A simple wrapper around MCP's tool definition (to avoid conflict with our own)" + }, + "MCPToolHealth": { + "properties": { + "status": { + "type": "string", + "title": "Status", + "description": "Schema health status: STRICT_COMPLIANT, NON_STRICT_ONLY, or INVALID" + }, + "reasons": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Reasons", + "description": "List of reasons for the health status" + } + }, + "type": "object", + "required": ["status"], + "title": "MCPToolHealth", + "description": "Health status for an MCP tool's schema." + }, + "ManagerType": { + "type": "string", + "enum": [ + "round_robin", + "supervisor", + "dynamic", + "sleeptime", + "voice_sleeptime", + "swarm" + ], + "title": "ManagerType" + }, + "MaxCountPerStepToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "max_count_per_step", + "title": "Type", + "default": "max_count_per_step" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored)." + }, + "max_count_limit": { + "type": "integer", + "title": "Max Count Limit", + "description": "The max limit for the total number of times this tool can be invoked in a single step." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name", "max_count_limit"], + "title": "MaxCountPerStepToolRule", + "description": "Represents a tool rule configuration which constrains the total number of times this tool can be invoked in a single step." + }, + "MaxCountPerStepToolRuleSchema": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name" + }, + "type": { + "type": "string", + "title": "Type" + }, + "max_count_limit": { + "type": "integer", + "title": "Max Count Limit" + } + }, + "type": "object", + "required": ["tool_name", "type", "max_count_limit"], + "title": "MaxCountPerStepToolRuleSchema" + }, + "Memory": { + "properties": { + "agent_type": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentType" + }, + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Type", + "description": "Agent type controlling prompt rendering." + }, + "blocks": { + "items": { + "$ref": "#/components/schemas/Block" + }, + "type": "array", + "title": "Blocks", + "description": "Memory blocks contained in the agent's in-context memory" + }, + "file_blocks": { + "items": { + "$ref": "#/components/schemas/FileBlock" + }, + "type": "array", + "title": "File Blocks", + "description": "Special blocks representing the agent's in-context memory of an attached file" + }, + "prompt_template": { + "type": "string", + "title": "Prompt Template", + "description": "Deprecated. Ignored for performance.", + "default": "" + } + }, + "type": "object", + "required": ["blocks"], + "title": "Memory", + "description": "Represents the in-context memory (i.e. Core memory) of the agent. This includes both the `Block` objects (labelled by sections), as well as tools to edit the blocks." + }, + "Message": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "id": { + "type": "string", + "pattern": "^message-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Message", + "examples": ["message-123e4567-e89b-12d3-a456-426614174000"] + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent." + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model used to make the function call." + }, + "role": { + "$ref": "#/components/schemas/MessageRole", + "description": "The role of the participant." + }, + "content": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + }, + { + "$ref": "#/components/schemas/ToolCallContent" + }, + { + "$ref": "#/components/schemas/ToolReturnContent" + }, + { + "$ref": "#/components/schemas/ReasoningContent" + }, + { + "$ref": "#/components/schemas/RedactedReasoningContent" + }, + { + "$ref": "#/components/schemas/OmittedReasoningContent" + }, + { + "$ref": "#/components/schemas/SummarizedReasoningContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "omitted_reasoning": "#/components/schemas/OmittedReasoningContent", + "reasoning": "#/components/schemas/ReasoningContent", + "redacted_reasoning": "#/components/schemas/RedactedReasoningContent", + "summarized_reasoning": "#/components/schemas/SummarizedReasoningContent", + "text": "#/components/schemas/TextContent", + "tool_call": "#/components/schemas/ToolCallContent", + "tool_return": "#/components/schemas/ToolReturnContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Content", + "description": "The content of the message." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "For role user/assistant: the (optional) name of the participant. For role tool/function: the name of the function called." + }, + "tool_calls": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ChatCompletionMessageFunctionToolCall-Output" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Calls", + "description": "The list of tool calls requested. Only applicable for role assistant." + }, + "tool_call_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Tool Call Id", + "description": "The ID of the tool call. Only applicable for role tool." + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id", + "description": "The id of the step that this message was created in." + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id", + "description": "The id of the run that this message was created in." + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid", + "description": "The offline threading id associated with this message" + }, + "tool_returns": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/letta__schemas__message__ToolReturn-Output" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Returns", + "description": "Tool execution return information for prior tool calls" + }, + "group_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Group Id", + "description": "The multi-agent group that the message was sent in" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id", + "description": "The id of the sender of the message, can be an identity id or agent id" + }, + "batch_item_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Batch Item Id", + "description": "The id of the LLMBatchItem that this message is associated with" + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The conversation this message belongs to" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err", + "description": "Whether this message is part of an error step. Used only for debugging purposes." + }, + "approval_request_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Approval Request Id", + "description": "The id of the approval request if this message is associated with a tool call request." + }, + "approve": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Approve", + "description": "Whether tool call is approved." + }, + "denial_reason": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Denial Reason", + "description": "The reason the tool call request was denied." + }, + "approvals": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/ApprovalReturn" + }, + { + "$ref": "#/components/schemas/letta__schemas__message__ToolReturn-Output" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Approvals", + "description": "The list of approvals for this message." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["role"], + "title": "Message", + "description": " Letta's internal representation of a message. Includes methods to convert to/from LLM provider formats.\n\n Attributes:\n id (str): The unique identifier of the message.\n role (MessageRole): The role of the participant.\n text (str): The text of the message.\n user_id (str): The unique identifier of the user.\n agent_id (str): The unique identifier of the agent.\n model (str): The model used to make the function call.\n name (str): The name of the participant.\n created_at (datetime): The time the message was created.\n tool_calls (List[OpenAIToolCall,]): The list of tool calls requested.\n tool_call_id (str): The id of the tool call.\n step_id (str): The id of the step that this message was created in.\n otid (str): The offline threading id associated with this message.\n tool_returns (List[ToolReturn]): The list of tool returns requested.\n group_id (str): The multi-agent group that the message was sent in.\n sender_id (str): The id of the sender of the message, can be an identity id or agent id.\n conversation_id (str): The conversation this message belongs to.\nt" + }, + "MessageCreate": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "const": "message" + }, + { + "type": "null" + } + ], + "title": "Type", + "description": "The message type to be created.", + "default": "message" + }, + "role": { + "type": "string", + "enum": ["user", "system", "assistant"], + "title": "Role", + "description": "The role of the participant." + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The content of the message." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the participant." + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid", + "description": "The offline threading id associated with this message" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id", + "description": "The id of the sender of the message, can be an identity id or agent id" + }, + "batch_item_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Batch Item Id", + "description": "The id of the LLMBatchItem that this message is associated with" + }, + "group_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Group Id", + "description": "The multi-agent group that the message was sent in" + } + }, + "type": "object", + "required": ["role", "content"], + "title": "MessageCreate", + "description": "Request to create a message" + }, + "MessageRole": { + "type": "string", + "enum": ["assistant", "user", "tool", "function", "system", "approval"], + "title": "MessageRole" + }, + "MessageSearchRequest": { + "properties": { + "query": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Query", + "description": "Text query for full-text search" + }, + "search_mode": { + "type": "string", + "enum": ["vector", "fts", "hybrid"], + "title": "Search Mode", + "description": "Search mode to use", + "default": "hybrid" + }, + "roles": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageRole" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Roles", + "description": "Filter messages by role" + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "Filter messages by agent ID" + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "Filter messages by project ID" + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "Filter messages by template ID" + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "Filter messages by conversation ID" + }, + "limit": { + "type": "integer", + "maximum": 100, + "minimum": 1, + "title": "Limit", + "description": "Maximum number of results to return", + "default": 50 + }, + "start_date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Start Date", + "description": "Filter messages created after this date" + }, + "end_date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "End Date", + "description": "Filter messages created on or before this date" + } + }, + "type": "object", + "title": "MessageSearchRequest", + "description": "Request model for searching messages across the organization" + }, + "MessageSearchResult": { + "properties": { + "embedded_text": { + "type": "string", + "title": "Embedded Text", + "description": "The embedded content (LLM-friendly)" + }, + "message": { + "$ref": "#/components/schemas/Message", + "description": "The raw message object" + }, + "fts_rank": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Fts Rank", + "description": "Full-text search rank position if FTS was used" + }, + "vector_rank": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Vector Rank", + "description": "Vector search rank position if vector search was used" + }, + "rrf_score": { + "type": "number", + "title": "Rrf Score", + "description": "Reciprocal Rank Fusion combined score" + } + }, + "type": "object", + "required": ["embedded_text", "message", "rrf_score"], + "title": "MessageSearchResult", + "description": "Result from a message search operation with scoring details." + }, + "MessageType": { + "type": "string", + "enum": [ + "system_message", + "user_message", + "assistant_message", + "reasoning_message", + "hidden_reasoning_message", + "tool_call_message", + "tool_return_message", + "approval_request_message", + "approval_response_message" + ], + "title": "MessageType" + }, + "ModalSandboxConfig": { + "properties": { + "timeout": { + "type": "integer", + "title": "Timeout", + "description": "Time limit for the sandbox (in seconds).", + "default": 60 + }, + "pip_requirements": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Pip Requirements", + "description": "A list of pip packages to install in the Modal sandbox" + }, + "npm_requirements": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Npm Requirements", + "description": "A list of npm packages to install in the Modal sandbox" + }, + "language": { + "type": "string", + "enum": ["python", "typescript"], + "title": "Language", + "default": "python" + } + }, + "type": "object", + "title": "ModalSandboxConfig" + }, + "Model": { + "properties": { + "handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Handle", + "description": "The handle for this config, in the format provider/model-name." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The actual model name used by the provider" + }, + "display_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Display Name", + "description": "A human-friendly display name for the model." + }, + "provider_type": { + "$ref": "#/components/schemas/ProviderType", + "description": "The type of the provider" + }, + "provider_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider Name", + "description": "The provider name for the model." + }, + "model_type": { + "type": "string", + "const": "llm", + "title": "Model Type", + "description": "Type of model (llm or embedding)", + "default": "llm" + }, + "model": { + "type": "string", + "title": "Model", + "description": "Deprecated: Use 'name' field instead. LLM model name.", + "deprecated": true + }, + "model_endpoint_type": { + "type": "string", + "enum": [ + "openai", + "anthropic", + "google_ai", + "google_vertex", + "azure", + "groq", + "ollama", + "webui", + "webui-legacy", + "lmstudio", + "lmstudio-legacy", + "lmstudio-chatcompletions", + "llamacpp", + "koboldcpp", + "vllm", + "hugging-face", + "minimax", + "mistral", + "together", + "bedrock", + "deepseek", + "xai", + "zai", + "openrouter", + "chatgpt_oauth" + ], + "title": "Model Endpoint Type", + "description": "Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", + "deprecated": true + }, + "model_endpoint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model Endpoint", + "description": "Deprecated: The endpoint for the model.", + "deprecated": true + }, + "provider_category": { + "anyOf": [ + { + "$ref": "#/components/schemas/ProviderCategory" + }, + { + "type": "null" + } + ], + "description": "Deprecated: The provider category for the model.", + "deprecated": true + }, + "model_wrapper": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model Wrapper", + "description": "Deprecated: The wrapper for the model.", + "deprecated": true + }, + "context_window": { + "type": "integer", + "title": "Context Window", + "description": "Deprecated: Use 'max_context_window' field instead. The context window size for the model.", + "deprecated": true + }, + "put_inner_thoughts_in_kwargs": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Put Inner Thoughts In Kwargs", + "description": "Deprecated: Puts 'inner_thoughts' as a kwarg in the function call.", + "default": true, + "deprecated": true + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "Deprecated: The temperature to use when generating text with the model.", + "default": 0.7, + "deprecated": true + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "Deprecated: The maximum number of tokens to generate.", + "deprecated": true + }, + "enable_reasoner": { + "type": "boolean", + "title": "Enable Reasoner", + "description": "Deprecated: Whether or not the model should use extended thinking if it is a 'reasoning' style model.", + "default": true, + "deprecated": true + }, + "reasoning_effort": { + "anyOf": [ + { + "type": "string", + "enum": ["none", "minimal", "low", "medium", "high", "xhigh"] + }, + { + "type": "null" + } + ], + "title": "Reasoning Effort", + "description": "Deprecated: The reasoning effort to use when generating text reasoning models.", + "deprecated": true + }, + "max_reasoning_tokens": { + "type": "integer", + "title": "Max Reasoning Tokens", + "description": "Deprecated: Configurable thinking budget for extended thinking.", + "default": 0, + "deprecated": true + }, + "effort": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Effort", + "description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'." + }, + "frequency_penalty": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Frequency Penalty", + "description": "Deprecated: Positive values penalize new tokens based on their existing frequency in the text so far.", + "deprecated": true + }, + "compatibility_type": { + "anyOf": [ + { + "type": "string", + "enum": ["gguf", "mlx"] + }, + { + "type": "null" + } + ], + "title": "Compatibility Type", + "description": "Deprecated: The framework compatibility type for the model.", + "deprecated": true + }, + "verbosity": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Verbosity", + "description": "Deprecated: Soft control for how verbose model output should be.", + "deprecated": true + }, + "tier": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Tier", + "description": "Deprecated: The cost tier for the model (cloud only).", + "deprecated": true + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Parallel Tool Calls", + "description": "Deprecated: If set to True, enables parallel tool calling.", + "default": false, + "deprecated": true + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings." + }, + "strict": { + "type": "boolean", + "title": "Strict", + "description": "Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.", + "default": false + }, + "max_context_window": { + "type": "integer", + "title": "Max Context Window", + "description": "The maximum context window for the model" + } + }, + "type": "object", + "required": [ + "name", + "provider_type", + "model", + "model_endpoint_type", + "context_window", + "max_context_window" + ], + "title": "Model" + }, + "ModifyApprovalRequest": { + "properties": { + "requires_approval": { + "type": "boolean", + "title": "Requires Approval", + "description": "Whether the tool requires approval before execution" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["requires_approval"], + "title": "ModifyApprovalRequest", + "description": "Request body for modifying tool approval requirements." + }, + "ModifyFeedbackRequest": { + "properties": { + "feedback": { + "anyOf": [ + { + "$ref": "#/components/schemas/FeedbackType" + }, + { + "type": "null" + } + ], + "description": "Whether this feedback is positive or negative" + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Feedback tags to add to the step" + } + }, + "type": "object", + "title": "ModifyFeedbackRequest" + }, + "NpmRequirement": { + "properties": { + "name": { + "type": "string", + "minLength": 1, + "title": "Name", + "description": "Name of the npm package." + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Version", + "description": "Optional version of the package, following semantic versioning." + } + }, + "type": "object", + "required": ["name"], + "title": "NpmRequirement" + }, + "OmittedReasoningContent": { + "properties": { + "type": { + "type": "string", + "const": "omitted_reasoning", + "title": "Type", + "description": "Indicates this is an omitted reasoning step.", + "default": "omitted_reasoning" + }, + "signature": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Signature", + "description": "A unique identifier for this reasoning step." + } + }, + "type": "object", + "title": "OmittedReasoningContent", + "description": "A placeholder for reasoning content we know is present, but isn't returned by the provider (e.g. OpenAI GPT-5 on ChatCompletions)" + }, + "OpenAIModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "openai", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "openai" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "reasoning": { + "$ref": "#/components/schemas/OpenAIReasoning", + "description": "The reasoning configuration for the model.", + "default": { + "reasoning_effort": "high" + } + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + }, + "strict": { + "type": "boolean", + "title": "Strict", + "description": "Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.", + "default": true + } + }, + "type": "object", + "title": "OpenAIModelSettings" + }, + "OpenAIReasoning": { + "properties": { + "reasoning_effort": { + "type": "string", + "enum": ["none", "minimal", "low", "medium", "high", "xhigh"], + "title": "Reasoning Effort", + "description": "The reasoning effort to use when generating text reasoning models", + "default": "minimal" + } + }, + "type": "object", + "title": "OpenAIReasoning" + }, + "OpenRouterModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "openrouter", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "openrouter" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "OpenRouterModelSettings", + "description": "OpenRouter model configuration (OpenAI-compatible)." + }, + "Organization": { + "properties": { + "id": { + "type": "string", + "pattern": "^org-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Org", + "examples": ["org-123e4567-e89b-12d3-a456-426614174000"] + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the organization.", + "default": "SincereYogurt" + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The creation date of the organization." + }, + "privileged_tools": { + "type": "boolean", + "title": "Privileged Tools", + "description": "Whether the organization has access to privileged tools.", + "default": false + } + }, + "additionalProperties": false, + "type": "object", + "title": "Organization" + }, + "OrganizationCreate": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the organization." + }, + "privileged_tools": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Privileged Tools", + "description": "Whether the organization has access to privileged tools.", + "default": false + } + }, + "additionalProperties": false, + "type": "object", + "title": "OrganizationCreate" + }, + "OrganizationSourcesStats": { + "properties": { + "total_sources": { + "type": "integer", + "title": "Total Sources", + "description": "Total number of sources", + "default": 0 + }, + "total_files": { + "type": "integer", + "title": "Total Files", + "description": "Total number of files across all sources", + "default": 0 + }, + "total_size": { + "type": "integer", + "title": "Total Size", + "description": "Total size of all files in bytes", + "default": 0 + }, + "sources": { + "items": { + "$ref": "#/components/schemas/SourceStats" + }, + "type": "array", + "title": "Sources", + "description": "List of source metadata" + } + }, + "additionalProperties": false, + "type": "object", + "title": "OrganizationSourcesStats", + "description": "Complete metadata response for organization sources" + }, + "OrganizationUpdate": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the organization." + }, + "privileged_tools": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Privileged Tools", + "description": "Whether the organization has access to privileged tools.", + "default": false + } + }, + "additionalProperties": false, + "type": "object", + "title": "OrganizationUpdate" + }, + "PaginatedAgentFiles": { + "properties": { + "files": { + "items": { + "$ref": "#/components/schemas/AgentFileAttachment" + }, + "type": "array", + "title": "Files", + "description": "List of file attachments for the agent" + }, + "next_cursor": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Next Cursor", + "description": "Cursor for fetching the next page (file-agent relationship ID)" + }, + "has_more": { + "type": "boolean", + "title": "Has More", + "description": "Whether more results exist after this page" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["files", "has_more"], + "title": "PaginatedAgentFiles", + "description": "Paginated response for agent files" + }, + "ParameterProperties": { + "properties": { + "type": { + "type": "string", + "title": "Type" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + } + }, + "type": "object", + "required": ["type"], + "title": "ParameterProperties" + }, + "ParametersSchema": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Type", + "default": "object" + }, + "properties": { + "additionalProperties": { + "$ref": "#/components/schemas/ParameterProperties" + }, + "type": "object", + "title": "Properties" + }, + "required": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Required" + } + }, + "type": "object", + "required": ["properties"], + "title": "ParametersSchema" + }, + "ParentToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "parent_last_tool", + "title": "Type", + "default": "parent_last_tool" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored)." + }, + "children": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Children", + "description": "The children tools that can be invoked." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name", "children"], + "title": "ParentToolRule", + "description": "A ToolRule that only allows a child tool to be called if the parent has been called." + }, + "Passage": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The creation date of the passage." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "is_deleted": { + "type": "boolean", + "title": "Is Deleted", + "description": "Whether this passage is deleted or not.", + "default": false + }, + "archive_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Archive Id", + "description": "The unique identifier of the archive containing this passage." + }, + "source_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Id", + "description": "Deprecated: Use `folder_id` field instead. The data source of the passage.", + "deprecated": true + }, + "file_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Id", + "description": "The unique identifier of the file associated with the passage." + }, + "file_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Name", + "description": "The name of the file (only for source passages)." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the passage.", + "default": {} + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Tags associated with this passage." + }, + "id": { + "type": "string", + "pattern": "^passage-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Passage", + "examples": ["passage-123e4567-e89b-12d3-a456-426614174000"] + }, + "text": { + "type": "string", + "title": "Text", + "description": "The text of the passage." + }, + "embedding": { + "anyOf": [ + { + "items": { + "type": "number" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding of the passage." + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "The embedding configuration used by the passage." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["text", "embedding", "embedding_config"], + "title": "Passage", + "description": "Representation of a passage, which is stored in archival memory." + }, + "PassageBatchCreateRequest": { + "properties": { + "passages": { + "items": { + "$ref": "#/components/schemas/PassageCreateRequest" + }, + "type": "array", + "title": "Passages", + "description": "Passages to create in the archive" + } + }, + "type": "object", + "required": ["passages"], + "title": "PassageBatchCreateRequest", + "description": "Request model for creating multiple passages in an archive." + }, + "PassageCreateRequest": { + "properties": { + "text": { + "type": "string", + "title": "Text", + "description": "The text content of the passage" + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Optional metadata for the passage" + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Optional tags for categorizing the passage" + }, + "created_at": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "Optional creation datetime for the passage (ISO 8601 format)" + } + }, + "type": "object", + "required": ["text"], + "title": "PassageCreateRequest", + "description": "Request model for creating a passage in an archive." + }, + "PassageSearchRequest": { + "properties": { + "query": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Query", + "description": "Text query for semantic search" + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "Filter passages by agent ID" + }, + "archive_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Archive Id", + "description": "Filter passages by archive ID" + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Optional list of tags to filter search results" + }, + "tag_match_mode": { + "type": "string", + "enum": ["any", "all"], + "title": "Tag Match Mode", + "description": "How to match tags - 'any' to match passages with any of the tags, 'all' to match only passages with all tags", + "default": "any" + }, + "limit": { + "type": "integer", + "maximum": 100, + "minimum": 1, + "title": "Limit", + "description": "Maximum number of results to return", + "default": 50 + }, + "start_date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Start Date", + "description": "Filter results to passages created after this datetime" + }, + "end_date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "End Date", + "description": "Filter results to passages created before this datetime" + } + }, + "type": "object", + "title": "PassageSearchRequest", + "description": "Request model for searching passages across archives." + }, + "PassageSearchResult": { + "properties": { + "passage": { + "$ref": "#/components/schemas/Passage", + "description": "The passage object" + }, + "score": { + "type": "number", + "title": "Score", + "description": "Relevance score" + }, + "metadata": { + "additionalProperties": true, + "type": "object", + "title": "Metadata", + "description": "Additional metadata about the search result" + } + }, + "type": "object", + "required": ["passage", "score"], + "title": "PassageSearchResult", + "description": "Result from a passage search operation with scoring details." + }, + "PipRequirement": { + "properties": { + "name": { + "type": "string", + "minLength": 1, + "title": "Name", + "description": "Name of the pip package." + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Version", + "description": "Optional version of the package, following semantic versioning." + } + }, + "type": "object", + "required": ["name"], + "title": "PipRequirement" + }, + "PromptTokensDetails": { + "properties": { + "audio_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Audio Tokens" + }, + "cached_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cached Tokens" + } + }, + "additionalProperties": true, + "type": "object", + "title": "PromptTokensDetails", + "description": "Breakdown of tokens used in the prompt." + }, + "Provider": { + "properties": { + "id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Id", + "description": "The id of the provider, lazily created by the database manager." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the provider" + }, + "provider_type": { + "$ref": "#/components/schemas/ProviderType", + "description": "The type of the provider" + }, + "provider_category": { + "$ref": "#/components/schemas/ProviderCategory", + "description": "The category of the provider (base or byok)" + }, + "api_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Api Key", + "description": "API key or secret key used for requests to the provider.", + "deprecated": true + }, + "base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Url", + "description": "Base URL for the provider." + }, + "access_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Access Key", + "description": "Access key used for requests to the provider.", + "deprecated": true + }, + "region": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Region", + "description": "Region used for requests to the provider." + }, + "api_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Api Version", + "description": "API version used for requests to the provider." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The last update timestamp of the provider." + }, + "last_synced": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Last Synced", + "description": "The last time models were synced for this provider." + }, + "api_key_enc": { + "anyOf": [ + { + "type": "string", + "description": "Encrypted secret value (stored as encrypted string)", + "nullable": true + }, + { + "type": "null" + } + ], + "title": "Api Key Enc", + "description": "Encrypted API key as Secret object" + }, + "access_key_enc": { + "anyOf": [ + { + "type": "string", + "description": "Encrypted secret value (stored as encrypted string)", + "nullable": true + }, + { + "type": "null" + } + ], + "title": "Access Key Enc", + "description": "Encrypted access key as Secret object" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name", "provider_type", "provider_category"], + "title": "Provider" + }, + "ProviderCategory": { + "type": "string", + "enum": ["base", "byok"], + "title": "ProviderCategory" + }, + "ProviderCheck": { + "properties": { + "provider_type": { + "$ref": "#/components/schemas/ProviderType", + "description": "The type of the provider." + }, + "api_key": { + "type": "string", + "title": "Api Key", + "description": "API key or secret key used for requests to the provider." + }, + "access_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Access Key", + "description": "Access key used for requests to the provider." + }, + "region": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Region", + "description": "Region used for requests to the provider." + }, + "base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Url", + "description": "Base URL used for requests to the provider." + }, + "api_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Api Version", + "description": "API version used for requests to the provider." + } + }, + "type": "object", + "required": ["provider_type", "api_key"], + "title": "ProviderCheck" + }, + "ProviderCreate": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the provider." + }, + "provider_type": { + "$ref": "#/components/schemas/ProviderType", + "description": "The type of the provider." + }, + "api_key": { + "type": "string", + "title": "Api Key", + "description": "API key or secret key used for requests to the provider." + }, + "access_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Access Key", + "description": "Access key used for requests to the provider." + }, + "region": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Region", + "description": "Region used for requests to the provider." + }, + "base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Url", + "description": "Base URL used for requests to the provider." + }, + "api_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Api Version", + "description": "API version used for requests to the provider." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name", "provider_type", "api_key"], + "title": "ProviderCreate" + }, + "ProviderTrace": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "id": { + "type": "string", + "pattern": "^provider_trace-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Provider_trace", + "examples": ["provider_trace-123e4567-e89b-12d3-a456-426614174000"] + }, + "request_json": { + "additionalProperties": true, + "type": "object", + "title": "Request Json", + "description": "JSON content of the provider request" + }, + "response_json": { + "additionalProperties": true, + "type": "object", + "title": "Response Json", + "description": "JSON content of the provider response" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id", + "description": "ID of the step that this trace is associated with" + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "ID of the agent that generated this trace" + }, + "agent_tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Agent Tags", + "description": "Tags associated with the agent for filtering" + }, + "call_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Call Type", + "description": "Type of call (agent_step, summarization, etc.)" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id", + "description": "ID of the run this trace is associated with" + }, + "source": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source", + "description": "Source service that generated this trace (memgpt-server, lettuce-py)" + }, + "org_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Org Id", + "description": "ID of the organization" + }, + "compaction_settings": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Compaction Settings", + "description": "Compaction/summarization settings (summarization calls only)" + }, + "llm_config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Llm Config", + "description": "LLM configuration used for this call (non-summarization calls only)" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["request_json", "response_json"], + "title": "ProviderTrace", + "description": "Letta's internal representation of a provider trace.\n\nAttributes:\n id (str): The unique identifier of the provider trace.\n request_json (Dict[str, Any]): JSON content of the provider request.\n response_json (Dict[str, Any]): JSON content of the provider response.\n step_id (str): ID of the step that this trace is associated with.\n agent_id (str): ID of the agent that generated this trace.\n agent_tags (list[str]): Tags associated with the agent for filtering.\n call_type (str): Type of call (agent_step, summarization, etc.).\n run_id (str): ID of the run this trace is associated with.\n source (str): Source service that generated this trace (memgpt-server, lettuce-py).\n organization_id (str): The unique identifier of the organization.\n user_id (str): The unique identifier of the user who initiated the request.\n compaction_settings (Dict[str, Any]): Compaction/summarization settings (only for summarization calls).\n llm_config (Dict[str, Any]): LLM configuration used for this call (only for non-summarization calls).\n created_at (datetime): The timestamp when the object was created." + }, + "ProviderType": { + "type": "string", + "enum": [ + "anthropic", + "azure", + "bedrock", + "cerebras", + "chatgpt_oauth", + "deepseek", + "google_ai", + "google_vertex", + "groq", + "hugging-face", + "letta", + "lmstudio_openai", + "minimax", + "mistral", + "ollama", + "openai", + "together", + "vllm", + "sglang", + "openrouter", + "xai", + "zai" + ], + "title": "ProviderType" + }, + "ProviderUpdate": { + "properties": { + "api_key": { + "type": "string", + "title": "Api Key", + "description": "API key or secret key used for requests to the provider." + }, + "access_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Access Key", + "description": "Access key used for requests to the provider." + }, + "region": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Region", + "description": "Region used for requests to the provider." + }, + "base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Url", + "description": "Base URL used for requests to the provider." + }, + "api_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Api Version", + "description": "API version used for requests to the provider." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["api_key"], + "title": "ProviderUpdate" + }, + "ReasoningContent": { + "properties": { + "type": { + "type": "string", + "const": "reasoning", + "title": "Type", + "description": "Indicates this is a reasoning/intermediate step.", + "default": "reasoning" + }, + "is_native": { + "type": "boolean", + "title": "Is Native", + "description": "Whether the reasoning content was generated by a reasoner model that processed this step." + }, + "reasoning": { + "type": "string", + "title": "Reasoning", + "description": "The intermediate reasoning or thought process content." + }, + "signature": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Signature", + "description": "A unique identifier for this reasoning step." + } + }, + "type": "object", + "required": ["is_native", "reasoning"], + "title": "ReasoningContent", + "description": "Sent via the Anthropic Messages API" + }, + "ReasoningMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "reasoning_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "reasoning_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "source": { + "type": "string", + "enum": ["reasoner_model", "non_reasoner_model"], + "title": "Source", + "default": "non_reasoner_model" + }, + "reasoning": { + "type": "string", + "title": "Reasoning" + }, + "signature": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Signature" + } + }, + "type": "object", + "required": ["id", "date", "reasoning"], + "title": "ReasoningMessage", + "description": "Representation of an agent's internal reasoning.\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n source (Literal[\"reasoner_model\", \"non_reasoner_model\"]): Whether the reasoning\n content was generated natively by a reasoner model or derived via prompting\n reasoning (str): The internal reasoning of the agent\n signature (Optional[str]): The model-generated signature of the reasoning step" + }, + "ReasoningMessageListResult": { + "properties": { + "reasoning": { + "type": "string", + "title": "Reasoning" + }, + "message_type": { + "type": "string", + "const": "reasoning_message", + "title": "Message Type", + "default": "reasoning_message" + }, + "message_id": { + "type": "string", + "title": "Message Id", + "description": "The unique identifier of the message." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent that owns the message." + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The unique identifier of the conversation that the message belongs to." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The time the message was created in ISO format." + } + }, + "type": "object", + "required": ["reasoning", "message_id", "created_at"], + "title": "ReasoningMessageListResult", + "description": "Reasoning message list result with agent context.\n\nShape is identical to UpdateReasoningMessage but includes the owning agent_id and message id." + }, + "RedactedReasoningContent": { + "properties": { + "type": { + "type": "string", + "const": "redacted_reasoning", + "title": "Type", + "description": "Indicates this is a redacted thinking step.", + "default": "redacted_reasoning" + }, + "data": { + "type": "string", + "title": "Data", + "description": "The redacted or filtered intermediate reasoning content." + } + }, + "type": "object", + "required": ["data"], + "title": "RedactedReasoningContent", + "description": "Sent via the Anthropic Messages API" + }, + "RequiredBeforeExitToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "required_before_exit", + "title": "Type", + "default": "required_before_exit" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored)." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name"], + "title": "RequiredBeforeExitToolRule", + "description": "Represents a tool rule configuration where this tool must be called before the agent loop can exit." + }, + "RequiresApprovalToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "requires_approval", + "title": "Type", + "default": "requires_approval" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored). Rendering uses fast built-in formatting for performance." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name"], + "title": "RequiresApprovalToolRule", + "description": "Represents a tool rule configuration which requires approval before the tool can be invoked." + }, + "ResetMessagesRequest": { + "properties": { + "add_default_initial_messages": { + "type": "boolean", + "title": "Add Default Initial Messages", + "description": "If true, adds the default initial messages after resetting.", + "default": false + } + }, + "type": "object", + "title": "ResetMessagesRequest", + "description": "Request body for resetting messages on an agent." + }, + "RetrieveStreamRequest": { + "properties": { + "starting_after": { + "type": "integer", + "title": "Starting After", + "description": "Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id", + "default": 0 + }, + "include_pings": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Pings", + "description": "Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.", + "default": true + }, + "poll_interval": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Poll Interval", + "description": "Seconds to wait between polls when no new data.", + "default": 0.1 + }, + "batch_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Batch Size", + "description": "Number of entries to read per batch.", + "default": 100 + } + }, + "type": "object", + "title": "RetrieveStreamRequest" + }, + "RoundRobinManager": { + "properties": { + "manager_type": { + "type": "string", + "const": "round_robin", + "title": "Manager Type", + "description": "", + "default": "round_robin" + }, + "max_turns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Turns", + "description": "" + } + }, + "type": "object", + "title": "RoundRobinManager" + }, + "RoundRobinManagerUpdate": { + "properties": { + "manager_type": { + "type": "string", + "const": "round_robin", + "title": "Manager Type", + "description": "", + "default": "round_robin" + }, + "max_turns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Turns", + "description": "" + } + }, + "type": "object", + "title": "RoundRobinManagerUpdate" + }, + "Run": { + "properties": { + "id": { + "type": "string", + "pattern": "^(job|run)-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Run", + "examples": ["run-123e4567-e89b-12d3-a456-426614174000"] + }, + "status": { + "$ref": "#/components/schemas/RunStatus", + "description": "The current status of the run.", + "default": "created" + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The timestamp when the run was created." + }, + "completed_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Completed At", + "description": "The timestamp when the run was completed." + }, + "agent_id": { + "type": "string", + "title": "Agent Id", + "description": "The unique identifier of the agent associated with the run." + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The unique identifier of the conversation associated with the run." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template ID that the run belongs to." + }, + "background": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Background", + "description": "Whether the run was created in background mode." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Additional metadata for the run." + }, + "request_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LettaRequestConfig" + }, + { + "type": "null" + } + ], + "description": "The request configuration for the run." + }, + "stop_reason": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "The reason why the run was stopped." + }, + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Callback Url", + "description": "If set, POST to this URL when the run completes." + }, + "callback_sent_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Callback Sent At", + "description": "Timestamp when the callback was last attempted." + }, + "callback_status_code": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Callback Status Code", + "description": "HTTP status code returned by the callback endpoint." + }, + "callback_error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Callback Error", + "description": "Optional error message from attempting to POST the callback endpoint." + }, + "ttft_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Ttft Ns", + "description": "Time to first token for a run in nanoseconds" + }, + "total_duration_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Duration Ns", + "description": "Total run duration in nanoseconds" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["agent_id"], + "title": "Run", + "description": "Representation of a run - a conversation or processing session for an agent. Runs track when agents process messages and maintain the relationship between agents, steps, and messages." + }, + "RunMetrics": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the run this metric belongs to (matches runs.id)." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project that the run belongs to (cloud only)." + }, + "run_start_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Run Start Ns", + "description": "The timestamp of the start of the run in nanoseconds." + }, + "run_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Run Ns", + "description": "Total time for the run in nanoseconds." + }, + "num_steps": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Num Steps", + "description": "The number of steps in the run." + }, + "tools_used": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools Used", + "description": "List of tool IDs that were used in this run." + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The template ID that the run belongs to (cloud only)." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template ID that the run belongs to (cloud only)." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["id"], + "title": "RunMetrics" + }, + "RunStatus": { + "type": "string", + "enum": ["created", "running", "completed", "failed", "cancelled"], + "title": "RunStatus", + "description": "Status of the run." + }, + "SSEMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "sse", + "title": "Mcp Server Type", + "default": "sse" + }, + "server_url": { + "type": "string", + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + }, + "id": { + "type": "string", + "pattern": "^mcp_server-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Mcp_server", + "examples": ["mcp_server-123e4567-e89b-12d3-a456-426614174000"] + }, + "server_name": { + "type": "string", + "title": "Server Name", + "description": "The name of the MCP server" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["server_url", "server_name"], + "title": "SSEMCPServer", + "description": "An SSE MCP server" + }, + "SSEServerConfig": { + "properties": { + "server_name": { + "type": "string", + "title": "Server Name", + "description": "The name of the server" + }, + "type": { + "$ref": "#/components/schemas/MCPServerType", + "default": "sse" + }, + "server_url": { + "type": "string", + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + } + }, + "type": "object", + "required": ["server_name", "server_url"], + "title": "SSEServerConfig", + "description": "Configuration for an MCP server using SSE" + }, + "SandboxConfig": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "id": { + "type": "string", + "pattern": "^sandbox-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Sandbox", + "examples": ["sandbox-123e4567-e89b-12d3-a456-426614174000"] + }, + "type": { + "$ref": "#/components/schemas/SandboxType", + "description": "The type of sandbox." + }, + "config": { + "additionalProperties": true, + "type": "object", + "title": "Config", + "description": "The JSON sandbox settings data." + } + }, + "additionalProperties": false, + "type": "object", + "title": "SandboxConfig" + }, + "SandboxConfigCreate": { + "properties": { + "config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LocalSandboxConfig" + }, + { + "$ref": "#/components/schemas/E2BSandboxConfig" + }, + { + "$ref": "#/components/schemas/ModalSandboxConfig" + } + ], + "title": "Config", + "description": "The configuration for the sandbox." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["config"], + "title": "SandboxConfigCreate" + }, + "SandboxConfigUpdate": { + "properties": { + "config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LocalSandboxConfig" + }, + { + "$ref": "#/components/schemas/E2BSandboxConfig" + }, + { + "$ref": "#/components/schemas/ModalSandboxConfig" + } + ], + "title": "Config", + "description": "The JSON configuration data for the sandbox." + } + }, + "additionalProperties": false, + "type": "object", + "title": "SandboxConfigUpdate", + "description": "Pydantic model for updating SandboxConfig fields." + }, + "SandboxEnvironmentVariable": { + "properties": { + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this object." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this object." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the object was last updated." + }, + "id": { + "type": "string", + "pattern": "^sandbox-env-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Sandbox-env", + "examples": ["sandbox-env-123e4567-e89b-12d3-a456-426614174000"] + }, + "key": { + "type": "string", + "title": "Key", + "description": "The name of the environment variable." + }, + "value": { + "type": "string", + "title": "Value", + "description": "The value of the environment variable." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "An optional description of the environment variable." + }, + "value_enc": { + "anyOf": [ + { + "type": "string", + "description": "Encrypted secret value (stored as encrypted string)", + "nullable": true + }, + { + "type": "null" + } + ], + "title": "Value Enc", + "description": "Encrypted value as Secret object" + }, + "sandbox_config_id": { + "type": "string", + "title": "Sandbox Config Id", + "description": "The ID of the sandbox config this environment variable belongs to." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["key", "value", "sandbox_config_id"], + "title": "SandboxEnvironmentVariable" + }, + "SandboxEnvironmentVariableCreate": { + "properties": { + "key": { + "type": "string", + "title": "Key", + "description": "The name of the environment variable." + }, + "value": { + "type": "string", + "title": "Value", + "description": "The value of the environment variable." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "An optional description of the environment variable." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["key", "value"], + "title": "SandboxEnvironmentVariableCreate" + }, + "SandboxEnvironmentVariableUpdate": { + "properties": { + "key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Key", + "description": "The name of the environment variable." + }, + "value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Value", + "description": "The value of the environment variable." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "An optional description of the environment variable." + } + }, + "additionalProperties": false, + "type": "object", + "title": "SandboxEnvironmentVariableUpdate" + }, + "SandboxType": { + "type": "string", + "enum": ["e2b", "modal", "local"], + "title": "SandboxType" + }, + "SearchAllMessagesRequest": { + "properties": { + "query": { + "type": "string", + "title": "Query", + "description": "Text query for full-text search" + }, + "search_mode": { + "type": "string", + "enum": ["vector", "fts", "hybrid"], + "title": "Search Mode", + "description": "Search mode to use", + "default": "hybrid" + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "Filter messages by agent ID" + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "Filter messages by conversation ID" + }, + "limit": { + "type": "integer", + "maximum": 100, + "minimum": 1, + "title": "Limit", + "description": "Maximum number of results to return", + "default": 50 + }, + "start_date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Start Date", + "description": "Filter messages created after this date" + }, + "end_date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "End Date", + "description": "Filter messages created on or before this date" + } + }, + "type": "object", + "required": ["query"], + "title": "SearchAllMessagesRequest" + }, + "SleeptimeManager": { + "properties": { + "manager_type": { + "type": "string", + "const": "sleeptime", + "title": "Manager Type", + "description": "", + "default": "sleeptime" + }, + "manager_agent_id": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "title": "Manager Agent Id", + "description": "", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "sleeptime_agent_frequency": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Sleeptime Agent Frequency", + "description": "" + } + }, + "type": "object", + "required": ["manager_agent_id"], + "title": "SleeptimeManager" + }, + "SleeptimeManagerUpdate": { + "properties": { + "manager_type": { + "type": "string", + "const": "sleeptime", + "title": "Manager Type", + "description": "", + "default": "sleeptime" + }, + "manager_agent_id": { + "anyOf": [ + { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + { + "type": "null" + } + ], + "title": "Manager Agent Id", + "description": "" + }, + "sleeptime_agent_frequency": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Sleeptime Agent Frequency", + "description": "" + } + }, + "type": "object", + "title": "SleeptimeManagerUpdate" + }, + "Source": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the source." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the source." + }, + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions", + "description": "Instructions for how to use the source." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata associated with the source." + }, + "id": { + "type": "string", + "pattern": "^source-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Source", + "examples": ["source-123e4567-e89b-12d3-a456-426614174000"] + }, + "embedding_config": { + "$ref": "#/components/schemas/EmbeddingConfig", + "description": "The embedding configuration used by the source." + }, + "vector_db_provider": { + "$ref": "#/components/schemas/VectorDBProvider", + "description": "The vector database provider used for this source's passages", + "default": "native" + }, + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this Tool." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this Tool." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The timestamp when the source was created." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The timestamp when the source was last updated." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name", "embedding_config"], + "title": "Source", + "description": "(Deprecated: Use Folder) Representation of a source, which is a collection of files and passages." + }, + "SourceCreate": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the source." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the source." + }, + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions", + "description": "Instructions for how to use the source." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata associated with the source." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The handle for the embedding config used by the source." + }, + "embedding_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Embedding Chunk Size", + "description": "The chunk size of the embedding." + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "(Legacy) The embedding configuration used by the source." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name"], + "title": "SourceCreate", + "description": "Schema for creating a new Source." + }, + "SourceSchema": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the source." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the source." + }, + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions", + "description": "Instructions for how to use the source." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata associated with the source." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The handle for the embedding config used by the source." + }, + "embedding_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Embedding Chunk Size", + "description": "The chunk size of the embedding." + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "(Legacy) The embedding configuration used by the source." + }, + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this source in the file" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name", "id"], + "title": "SourceSchema", + "description": "Source with human-readable ID for agent file" + }, + "SourceStats": { + "properties": { + "source_id": { + "type": "string", + "title": "Source Id", + "description": "Deprecated: Use `folder_id` field instead. Unique identifier of the source", + "deprecated": true + }, + "source_name": { + "type": "string", + "title": "Source Name", + "description": "Deprecated: Use `folder_name` field instead. Name of the source", + "deprecated": true + }, + "file_count": { + "type": "integer", + "title": "File Count", + "description": "Number of files in the source", + "default": 0 + }, + "total_size": { + "type": "integer", + "title": "Total Size", + "description": "Total size of all files in bytes", + "default": 0 + }, + "files": { + "items": { + "$ref": "#/components/schemas/FileStats" + }, + "type": "array", + "title": "Files", + "description": "List of file statistics" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["source_id", "source_name"], + "title": "SourceStats", + "description": "Aggregated metadata for a source" + }, + "SourceUpdate": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the source." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the source." + }, + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions", + "description": "Instructions for how to use the source." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "Metadata associated with the source." + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "The embedding configuration used by the source." + } + }, + "additionalProperties": false, + "type": "object", + "title": "SourceUpdate", + "description": "Schema for updating an existing Source." + }, + "StdioMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "stdio", + "title": "Mcp Server Type", + "default": "stdio" + }, + "command": { + "type": "string", + "title": "Command", + "description": "The command to run (MCP 'local' client will run this command)" + }, + "args": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Args", + "description": "The arguments to pass to the command" + }, + "env": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Env", + "description": "Environment variables to set" + }, + "id": { + "type": "string", + "pattern": "^mcp_server-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Mcp_server", + "examples": ["mcp_server-123e4567-e89b-12d3-a456-426614174000"] + }, + "server_name": { + "type": "string", + "title": "Server Name", + "description": "The name of the MCP server" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["command", "args", "server_name"], + "title": "StdioMCPServer", + "description": "A Stdio MCP server" + }, + "StdioServerConfig": { + "properties": { + "server_name": { + "type": "string", + "title": "Server Name", + "description": "The name of the server" + }, + "type": { + "$ref": "#/components/schemas/MCPServerType", + "default": "stdio" + }, + "command": { + "type": "string", + "title": "Command", + "description": "The command to run (MCP 'local' client will run this command)" + }, + "args": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Args", + "description": "The arguments to pass to the command" + }, + "env": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Env", + "description": "Environment variables to set" + } + }, + "type": "object", + "required": ["server_name", "command", "args"], + "title": "StdioServerConfig" + }, + "Step": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the step. Assigned by the database." + }, + "origin": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Origin", + "description": "The surface that this agent step was initiated from." + }, + "provider_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider Id", + "description": "The unique identifier of the provider that was configured for this step" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id", + "description": "The unique identifier of the run that this step belongs to. Only included for async calls." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The ID of the agent that performed the step." + }, + "provider_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider Name", + "description": "The name of the provider used for this step." + }, + "provider_category": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider Category", + "description": "The category of the provider used for this step." + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The name of the model used for this step." + }, + "model_endpoint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model Endpoint", + "description": "The model endpoint url used for this step." + }, + "context_window_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Context Window Limit", + "description": "The context window limit configured for this step." + }, + "completion_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Completion Tokens", + "description": "The number of tokens generated by the agent during this step." + }, + "prompt_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Prompt Tokens", + "description": "The number of tokens in the prompt during this step." + }, + "total_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Tokens", + "description": "The total number of tokens processed by the agent during this step." + }, + "completion_tokens_details": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Completion Tokens Details", + "description": "Detailed completion token breakdown (e.g., reasoning_tokens)." + }, + "prompt_tokens_details": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Prompt Tokens Details", + "description": "Detailed prompt token breakdown (e.g., cached_tokens, cache_read_tokens, cache_creation_tokens)." + }, + "stop_reason": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "The stop reason associated with the step." + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags", + "description": "Metadata tags.", + "default": [] + }, + "tid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Tid", + "description": "The unique identifier of the transaction that processed this step." + }, + "trace_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Trace Id", + "description": "The trace id of the agent step." + }, + "request_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Request Id", + "description": "The API request log ID from cloud-api for correlating steps with API requests." + }, + "messages": { + "items": { + "$ref": "#/components/schemas/Message" + }, + "type": "array", + "title": "Messages", + "description": "The messages generated during this step. Deprecated: use `GET /v1/steps/{step_id}/messages` endpoint instead", + "default": [], + "deprecated": true + }, + "feedback": { + "anyOf": [ + { + "type": "string", + "enum": ["positive", "negative"] + }, + { + "type": "null" + } + ], + "title": "Feedback", + "description": "The feedback for this step. Must be either 'positive' or 'negative'." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project that the agent that executed this step belongs to (cloud only)." + }, + "error_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error Type", + "description": "The type/class of the error that occurred" + }, + "error_data": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Error Data", + "description": "Error details including message, traceback, and additional context" + }, + "status": { + "anyOf": [ + { + "$ref": "#/components/schemas/StepStatus" + }, + { + "type": "null" + } + ], + "description": "Step status: pending, success, or failed", + "default": "pending" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["id"], + "title": "Step" + }, + "StepMetrics": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the step this metric belongs to (matches steps.id)." + }, + "provider_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider Id", + "description": "The unique identifier of the provider." + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id", + "description": "The unique identifier of the run." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent." + }, + "step_start_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Step Start Ns", + "description": "The timestamp of the start of the step in nanoseconds." + }, + "llm_request_start_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Llm Request Start Ns", + "description": "The timestamp of the start of the llm request in nanoseconds." + }, + "llm_request_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Llm Request Ns", + "description": "Time spent on LLM requests in nanoseconds." + }, + "tool_execution_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Tool Execution Ns", + "description": "Time spent on tool execution in nanoseconds." + }, + "step_ns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Step Ns", + "description": "Total time for the step in nanoseconds." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template ID that the step belongs to (cloud only)." + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The template ID that the step belongs to (cloud only)." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project that the step belongs to (cloud only)." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["id"], + "title": "StepMetrics" + }, + "StepStatus": { + "type": "string", + "enum": ["pending", "success", "failed", "cancelled"], + "title": "StepStatus", + "description": "Status of a step execution" + }, + "StopReasonType": { + "type": "string", + "enum": [ + "end_turn", + "error", + "llm_api_error", + "invalid_llm_response", + "invalid_tool_call", + "max_steps", + "max_tokens_exceeded", + "no_tool_call", + "tool_rule", + "cancelled", + "requires_approval", + "context_window_overflow_in_system_prompt" + ], + "title": "StopReasonType" + }, + "StreamableHTTPMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "streamable_http", + "title": "Mcp Server Type", + "default": "streamable_http" + }, + "server_url": { + "type": "string", + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + }, + "id": { + "type": "string", + "pattern": "^mcp_server-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Mcp_server", + "examples": ["mcp_server-123e4567-e89b-12d3-a456-426614174000"] + }, + "server_name": { + "type": "string", + "title": "Server Name", + "description": "The name of the MCP server" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["server_url", "server_name"], + "title": "StreamableHTTPMCPServer", + "description": "A Streamable HTTP MCP server" + }, + "StreamableHTTPServerConfig": { + "properties": { + "server_name": { + "type": "string", + "title": "Server Name", + "description": "The name of the server" + }, + "type": { + "$ref": "#/components/schemas/MCPServerType", + "default": "streamable_http" + }, + "server_url": { + "type": "string", + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + } + }, + "type": "object", + "required": ["server_name", "server_url"], + "title": "StreamableHTTPServerConfig", + "description": "Configuration for an MCP server using Streamable HTTP" + }, + "SummarizedReasoningContent": { + "properties": { + "type": { + "type": "string", + "const": "summarized_reasoning", + "title": "Type", + "description": "Indicates this is a summarized reasoning step.", + "default": "summarized_reasoning" + }, + "id": { + "type": "string", + "title": "Id", + "description": "The unique identifier for this reasoning step." + }, + "summary": { + "items": { + "$ref": "#/components/schemas/SummarizedReasoningContentPart" + }, + "type": "array", + "title": "Summary", + "description": "Summaries of the reasoning content." + }, + "encrypted_content": { + "type": "string", + "title": "Encrypted Content", + "description": "The encrypted reasoning content." + } + }, + "type": "object", + "required": ["id", "summary"], + "title": "SummarizedReasoningContent", + "description": "The style of reasoning content returned by the OpenAI Responses API" + }, + "SummarizedReasoningContentPart": { + "properties": { + "index": { + "type": "integer", + "title": "Index", + "description": "The index of the summary part." + }, + "text": { + "type": "string", + "title": "Text", + "description": "The text of the summary part." + } + }, + "type": "object", + "required": ["index", "text"], + "title": "SummarizedReasoningContentPart" + }, + "SummaryMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "summary", + "title": "Message Type", + "default": "summary_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "summary": { + "type": "string", + "title": "Summary" + } + }, + "type": "object", + "required": ["id", "date", "summary"], + "title": "SummaryMessage", + "description": "A message representing a summary of the conversation. Sent to the LLM as a user or system message depending on the provider." + }, + "SupervisorManager": { + "properties": { + "manager_type": { + "type": "string", + "const": "supervisor", + "title": "Manager Type", + "description": "", + "default": "supervisor" + }, + "manager_agent_id": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "title": "Manager Agent Id", + "description": "", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + } + }, + "type": "object", + "required": ["manager_agent_id"], + "title": "SupervisorManager" + }, + "SupervisorManagerUpdate": { + "properties": { + "manager_type": { + "type": "string", + "const": "supervisor", + "title": "Manager Type", + "description": "", + "default": "supervisor" + }, + "manager_agent_id": { + "anyOf": [ + { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + { + "type": "null" + } + ], + "title": "Manager Agent Id", + "description": "" + } + }, + "type": "object", + "required": ["manager_agent_id"], + "title": "SupervisorManagerUpdate" + }, + "SystemMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "system_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "system_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "content": { + "type": "string", + "title": "Content", + "description": "The message content sent by the system" + } + }, + "type": "object", + "required": ["id", "date", "content"], + "title": "SystemMessage", + "description": "A message generated by the system. Never streamed back on a response, only used for cursor pagination.\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n content (str): The message content sent by the system" + }, + "SystemMessageListResult": { + "properties": { + "message_type": { + "type": "string", + "const": "system_message", + "title": "Message Type", + "default": "system_message" + }, + "content": { + "type": "string", + "title": "Content", + "description": "The message content sent by the system (can be a string or an array of multi-modal content parts)" + }, + "message_id": { + "type": "string", + "title": "Message Id", + "description": "The unique identifier of the message." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent that owns the message." + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The unique identifier of the conversation that the message belongs to." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The time the message was created in ISO format." + } + }, + "type": "object", + "required": ["content", "message_id", "created_at"], + "title": "SystemMessageListResult", + "description": "System message list result with agent context.\n\nShape is identical to UpdateSystemMessage but includes the owning agent_id and message id." + }, + "TagSchema": { + "properties": { + "tag": { + "type": "string", + "title": "Tag" + } + }, + "type": "object", + "required": ["tag"], + "title": "TagSchema" + }, + "TerminalToolRule": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "The name of the tool. Must exist in the database for the user's organization." + }, + "type": { + "type": "string", + "const": "exit_loop", + "title": "Type", + "default": "exit_loop" + }, + "prompt_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt Template", + "description": "Optional template string (ignored)." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool_name"], + "title": "TerminalToolRule", + "description": "Represents a terminal tool rule configuration where if this tool gets called, it must end the agent loop." + }, + "TextContent": { + "properties": { + "type": { + "type": "string", + "const": "text", + "title": "Type", + "description": "The type of the message.", + "default": "text" + }, + "text": { + "type": "string", + "title": "Text", + "description": "The text content of the message." + }, + "signature": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Signature", + "description": "Stores a unique identifier for any reasoning associated with this text content." + } + }, + "type": "object", + "required": ["text"], + "title": "TextContent" + }, + "TextResponseFormat": { + "properties": { + "type": { + "type": "string", + "const": "text", + "title": "Type", + "description": "The type of the response format.", + "default": "text" + } + }, + "type": "object", + "title": "TextResponseFormat", + "description": "Response format for plain text responses." + }, + "TogetherModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "together", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "together" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "TogetherModelSettings", + "description": "Together AI model configuration (OpenAI-compatible)." + }, + "Tool": { + "properties": { + "id": { + "type": "string", + "pattern": "^tool-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the Tool", + "examples": ["tool-123e4567-e89b-12d3-a456-426614174000"] + }, + "tool_type": { + "$ref": "#/components/schemas/ToolType", + "description": "The type of the tool.", + "default": "custom" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the tool." + }, + "source_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Type", + "description": "The type of the source code." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the function." + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags", + "description": "Metadata tags.", + "default": [] + }, + "source_code": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Code", + "description": "The source code of the function." + }, + "json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Json Schema", + "description": "The JSON schema of the function." + }, + "args_json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Args Json Schema", + "description": "The args JSON schema of the function." + }, + "return_char_limit": { + "type": "integer", + "maximum": 1000000, + "minimum": 1, + "title": "Return Char Limit", + "description": "The maximum number of characters in the response.", + "default": 50000 + }, + "pip_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/PipRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Pip Requirements", + "description": "Optional list of pip packages required by this tool." + }, + "npm_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/NpmRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Npm Requirements", + "description": "Optional list of npm packages required by this tool." + }, + "default_requires_approval": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Default Requires Approval", + "description": "Default value for whether or not executing this tool requires approval." + }, + "enable_parallel_execution": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Parallel Execution", + "description": "If set to True, then this tool will potentially be executed concurrently with other tools. Default False.", + "default": false + }, + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this Tool." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this Tool." + }, + "metadata_": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "A dictionary of additional metadata for the tool." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project id of the tool." + } + }, + "additionalProperties": false, + "type": "object", + "title": "Tool", + "description": "Representation of a tool, which is a function that can be called by the agent." + }, + "ToolAnnotations": { + "properties": { + "title": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Title" + }, + "readOnlyHint": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Readonlyhint" + }, + "destructiveHint": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Destructivehint" + }, + "idempotentHint": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Idempotenthint" + }, + "openWorldHint": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Openworldhint" + } + }, + "additionalProperties": true, + "type": "object", + "title": "ToolAnnotations", + "description": "Additional properties describing a Tool to clients.\n\nNOTE: all properties in ToolAnnotations are **hints**.\nThey are not guaranteed to provide a faithful description of\ntool behavior (including descriptive properties like `title`).\n\nClients should never make tool use decisions based on ToolAnnotations\nreceived from untrusted servers." + }, + "ToolCall": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "arguments": { + "type": "string", + "title": "Arguments" + }, + "tool_call_id": { + "type": "string", + "title": "Tool Call Id" + } + }, + "type": "object", + "required": ["name", "arguments", "tool_call_id"], + "title": "ToolCall" + }, + "ToolCallContent": { + "properties": { + "type": { + "type": "string", + "const": "tool_call", + "title": "Type", + "description": "Indicates this content represents a tool call event.", + "default": "tool_call" + }, + "id": { + "type": "string", + "title": "Id", + "description": "A unique identifier for this specific tool call instance." + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the tool being called." + }, + "input": { + "additionalProperties": true, + "type": "object", + "title": "Input", + "description": "The parameters being passed to the tool, structured as a dictionary of parameter names to values." + }, + "signature": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Signature", + "description": "Stores a unique identifier for any reasoning associated with this tool call." + } + }, + "type": "object", + "required": ["id", "name", "input"], + "title": "ToolCallContent" + }, + "ToolCallDelta": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "arguments": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Arguments" + }, + "tool_call_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Tool Call Id" + } + }, + "type": "object", + "title": "ToolCallDelta" + }, + "ToolCallMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "tool_call_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "tool_call_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "tool_call": { + "anyOf": [ + { + "$ref": "#/components/schemas/ToolCall" + }, + { + "$ref": "#/components/schemas/ToolCallDelta" + } + ], + "title": "Tool Call", + "deprecated": true + }, + "tool_calls": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ToolCall" + }, + "type": "array" + }, + { + "$ref": "#/components/schemas/ToolCallDelta" + }, + { + "type": "null" + } + ], + "title": "Tool Calls" + } + }, + "type": "object", + "required": ["id", "date", "tool_call"], + "title": "ToolCallMessage", + "description": "A message representing a request to call a tool (generated by the LLM to trigger tool execution).\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n tool_call (Union[ToolCall, ToolCallDelta]): The tool call" + }, + "ToolCallNode": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the child tool to invoke next." + }, + "args": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Args", + "description": "Optional prefilled arguments for this child tool. Keys must match the tool's parameter names and values must satisfy the tool's JSON schema. Supports partial prefill; non-overlapping parameters are left to the model." + } + }, + "type": "object", + "required": ["name"], + "title": "ToolCallNode", + "description": "Typed child override for prefilled arguments.\n\nWhen used in a ChildToolRule, if this child is selected next, its `args` will be\napplied as prefilled arguments (overriding overlapping LLM-provided values)." + }, + "ToolCreate": { + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the tool." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Metadata tags." + }, + "source_code": { + "type": "string", + "title": "Source Code", + "description": "The source code of the function." + }, + "source_type": { + "type": "string", + "title": "Source Type", + "description": "The source type of the function.", + "default": "python" + }, + "json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Json Schema", + "description": "The JSON schema of the function (auto-generated from source_code if not provided)" + }, + "args_json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Args Json Schema", + "description": "The args JSON schema of the function." + }, + "return_char_limit": { + "type": "integer", + "maximum": 1000000, + "minimum": 1, + "title": "Return Char Limit", + "description": "The maximum number of characters in the response.", + "default": 50000 + }, + "pip_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/PipRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Pip Requirements", + "description": "Optional list of pip packages required by this tool." + }, + "npm_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/NpmRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Npm Requirements", + "description": "Optional list of npm packages required by this tool." + }, + "default_requires_approval": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Default Requires Approval", + "description": "Whether or not to require approval before executing this tool." + }, + "enable_parallel_execution": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Parallel Execution", + "description": "If set to True, then this tool will potentially be executed concurrently with other tools. Default False.", + "default": false + } + }, + "additionalProperties": false, + "type": "object", + "required": ["source_code"], + "title": "ToolCreate" + }, + "ToolEnvVarSchema": { + "properties": { + "created_at": { + "type": "string", + "title": "Created At" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "key": { + "type": "string", + "title": "Key" + }, + "updated_at": { + "type": "string", + "title": "Updated At" + }, + "value": { + "type": "string", + "title": "Value" + } + }, + "type": "object", + "required": ["created_at", "description", "key", "updated_at", "value"], + "title": "ToolEnvVarSchema" + }, + "ToolExecutionResult": { + "properties": { + "status": { + "type": "string", + "enum": ["success", "error"], + "title": "Status", + "description": "The status of the tool execution and return object" + }, + "func_return": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "title": "Func Return", + "description": "The function return object" + }, + "agent_state": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentState" + }, + { + "type": "null" + } + ], + "description": "The agent state", + "deprecated": true + }, + "stdout": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stdout", + "description": "Captured stdout (prints, logs) from function invocation" + }, + "stderr": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stderr", + "description": "Captured stderr from the function invocation" + }, + "sandbox_config_fingerprint": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sandbox Config Fingerprint", + "description": "The fingerprint of the config for the sandbox" + } + }, + "type": "object", + "required": ["status"], + "title": "ToolExecutionResult" + }, + "ToolJSONSchema": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "description": { + "type": "string", + "title": "Description" + }, + "parameters": { + "$ref": "#/components/schemas/ParametersSchema" + }, + "type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Type" + }, + "required": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Required" + } + }, + "type": "object", + "required": ["name", "description", "parameters"], + "title": "ToolJSONSchema" + }, + "ToolReturnContent": { + "properties": { + "type": { + "type": "string", + "const": "tool_return", + "title": "Type", + "description": "Indicates this content represents a tool return event.", + "default": "tool_return" + }, + "tool_call_id": { + "type": "string", + "title": "Tool Call Id", + "description": "References the ID of the ToolCallContent that initiated this tool call." + }, + "content": { + "type": "string", + "title": "Content", + "description": "The content returned by the tool execution." + }, + "is_error": { + "type": "boolean", + "title": "Is Error", + "description": "Indicates whether the tool execution resulted in an error." + } + }, + "type": "object", + "required": ["tool_call_id", "content", "is_error"], + "title": "ToolReturnContent" + }, + "ToolReturnMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "tool_return_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "tool_return_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "tool_return": { + "type": "string", + "title": "Tool Return", + "deprecated": true + }, + "status": { + "type": "string", + "enum": ["success", "error"], + "title": "Status", + "deprecated": true + }, + "tool_call_id": { + "type": "string", + "title": "Tool Call Id", + "deprecated": true + }, + "stdout": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stdout", + "deprecated": true + }, + "stderr": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stderr", + "deprecated": true + }, + "tool_returns": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/letta__schemas__letta_message__ToolReturn" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Returns" + } + }, + "type": "object", + "required": ["id", "date", "tool_return", "status", "tool_call_id"], + "title": "ToolReturnMessage", + "description": "A message representing the return value of a tool call (generated by Letta executing the requested tool).\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n tool_return (str): The return value of the tool (deprecated, use tool_returns)\n status (Literal[\"success\", \"error\"]): The status of the tool call (deprecated, use tool_returns)\n tool_call_id (str): A unique identifier for the tool call that generated this message (deprecated, use tool_returns)\n stdout (Optional[List(str)]): Captured stdout (e.g. prints, logs) from the tool invocation (deprecated, use tool_returns)\n stderr (Optional[List(str)]): Captured stderr from the tool invocation (deprecated, use tool_returns)\n tool_returns (Optional[List[ToolReturn]]): List of tool returns for multi-tool support" + }, + "ToolRunFromSource": { + "properties": { + "source_code": { + "type": "string", + "title": "Source Code", + "description": "The source code of the function." + }, + "args": { + "additionalProperties": true, + "type": "object", + "title": "Args", + "description": "The arguments to pass to the tool." + }, + "env_vars": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Env Vars", + "description": "The environment variables to pass to the tool." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the tool to run." + }, + "source_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Type", + "description": "The type of the source code." + }, + "args_json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Args Json Schema", + "description": "The args JSON schema of the function." + }, + "json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Json Schema", + "description": "The JSON schema of the function (auto-generated from source_code if not provided)" + }, + "pip_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/PipRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Pip Requirements", + "description": "Optional list of pip packages required by this tool." + }, + "npm_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/NpmRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Npm Requirements", + "description": "Optional list of npm packages required by this tool." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["source_code", "args"], + "title": "ToolRunFromSource" + }, + "ToolSearchRequest": { + "properties": { + "query": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Query", + "description": "Text query for semantic search." + }, + "search_mode": { + "type": "string", + "enum": ["vector", "fts", "hybrid"], + "title": "Search Mode", + "description": "Search mode: vector, fts, or hybrid.", + "default": "hybrid" + }, + "tool_types": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Types", + "description": "Filter by tool types (e.g., 'custom', 'letta_core')." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Filter by tags (match any)." + }, + "limit": { + "type": "integer", + "maximum": 100, + "minimum": 1, + "title": "Limit", + "description": "Maximum number of results to return.", + "default": 50 + } + }, + "additionalProperties": false, + "type": "object", + "title": "ToolSearchRequest", + "description": "Request model for searching tools using semantic search." + }, + "ToolSearchResult": { + "properties": { + "tool": { + "$ref": "#/components/schemas/Tool", + "description": "The matched tool." + }, + "embedded_text": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedded Text", + "description": "The embedded text content used for matching." + }, + "fts_rank": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Fts Rank", + "description": "Full-text search rank position." + }, + "vector_rank": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Vector Rank", + "description": "Vector search rank position." + }, + "combined_score": { + "type": "number", + "title": "Combined Score", + "description": "Combined relevance score (RRF for hybrid mode)." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["tool", "combined_score"], + "title": "ToolSearchResult", + "description": "Result from a tool search operation." + }, + "ToolType": { + "type": "string", + "enum": [ + "custom", + "letta_core", + "letta_memory_core", + "letta_multi_agent_core", + "letta_sleeptime_core", + "letta_voice_sleeptime_core", + "letta_builtin", + "letta_files_core", + "external_langchain", + "external_composio", + "external_mcp" + ], + "title": "ToolType" + }, + "ToolUpdate": { + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the tool." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "Metadata tags." + }, + "source_code": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Code", + "description": "The source code of the function." + }, + "source_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Type", + "description": "The type of the source code." + }, + "json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Json Schema", + "description": "The JSON schema of the function (auto-generated from source_code if not provided)" + }, + "args_json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Args Json Schema", + "description": "The args JSON schema of the function." + }, + "return_char_limit": { + "anyOf": [ + { + "type": "integer", + "maximum": 1000000, + "minimum": 1 + }, + { + "type": "null" + } + ], + "title": "Return Char Limit", + "description": "The maximum number of characters in the response." + }, + "pip_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/PipRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Pip Requirements", + "description": "Optional list of pip packages required by this tool." + }, + "npm_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/NpmRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Npm Requirements", + "description": "Optional list of npm packages required by this tool." + }, + "metadata_": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "A dictionary of additional metadata for the tool." + }, + "default_requires_approval": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Default Requires Approval", + "description": "Whether or not to require approval before executing this tool." + }, + "enable_parallel_execution": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Parallel Execution", + "description": "If set to True, then this tool will potentially be executed concurrently with other tools. Default False.", + "default": false + } + }, + "type": "object", + "title": "ToolUpdate" + }, + "TopLogprob": { + "properties": { + "token": { + "type": "string", + "title": "Token" + }, + "bytes": { + "anyOf": [ + { + "items": { + "type": "integer" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Bytes" + }, + "logprob": { + "type": "number", + "title": "Logprob" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["token", "logprob"], + "title": "TopLogprob" + }, + "UpdateAgent": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the agent." + }, + "tool_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Ids", + "description": "The ids of the tools used by the agent." + }, + "source_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Source Ids", + "description": "Deprecated: Use `folder_ids` field instead. The ids of the sources used by the agent.", + "deprecated": true + }, + "folder_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Folder Ids", + "description": "The ids of the folders used by the agent." + }, + "block_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Block Ids", + "description": "The ids of the blocks used by the agent." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags associated with the agent." + }, + "system": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System", + "description": "The system prompt used by the agent." + }, + "tool_rules": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/ChildToolRule" + }, + { + "$ref": "#/components/schemas/InitToolRule" + }, + { + "$ref": "#/components/schemas/TerminalToolRule" + }, + { + "$ref": "#/components/schemas/ConditionalToolRule" + }, + { + "$ref": "#/components/schemas/ContinueToolRule" + }, + { + "$ref": "#/components/schemas/RequiredBeforeExitToolRule" + }, + { + "$ref": "#/components/schemas/MaxCountPerStepToolRule" + }, + { + "$ref": "#/components/schemas/ParentToolRule" + }, + { + "$ref": "#/components/schemas/RequiresApprovalToolRule" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "conditional": "#/components/schemas/ConditionalToolRule", + "constrain_child_tools": "#/components/schemas/ChildToolRule", + "continue_loop": "#/components/schemas/ContinueToolRule", + "exit_loop": "#/components/schemas/TerminalToolRule", + "max_count_per_step": "#/components/schemas/MaxCountPerStepToolRule", + "parent_last_tool": "#/components/schemas/ParentToolRule", + "required_before_exit": "#/components/schemas/RequiredBeforeExitToolRule", + "requires_approval": "#/components/schemas/RequiresApprovalToolRule", + "run_first": "#/components/schemas/InitToolRule" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Rules", + "description": "The tool rules governing the agent." + }, + "message_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Message Ids", + "description": "The ids of the messages in the agent's in-context memory." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the agent." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the agent." + }, + "tool_exec_environment_variables": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Tool Exec Environment Variables", + "description": "Deprecated: use `secrets` field instead" + }, + "secrets": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Secrets", + "description": "The environment variables for tool execution specific to this agent." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The id of the project the agent belongs to." + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "The id of the template the agent belongs to." + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "The base template id of the agent." + }, + "identity_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Identity Ids", + "description": "The ids of the identities associated with this agent." + }, + "message_buffer_autoclear": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Message Buffer Autoclear", + "description": "If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case." + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle used by the agent (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/ZAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + }, + { + "$ref": "#/components/schemas/OpenRouterModelSettings" + }, + { + "$ref": "#/components/schemas/ChatGPTOAuthModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "chatgpt_oauth": "#/components/schemas/ChatGPTOAuthModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "openrouter": "#/components/schemas/OpenRouterModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings", + "zai": "#/components/schemas/ZAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "The model settings for the agent." + }, + "compaction_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionSettings-Input" + }, + { + "type": "null" + } + ], + "description": "The compaction settings configuration used for compaction." + }, + "context_window_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Context Window Limit", + "description": "The context window limit used by the agent." + }, + "reasoning": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Reasoning", + "description": "Deprecated: Use `model` field to configure reasoning instead. Whether to enable reasoning for this agent.", + "deprecated": true + }, + "llm_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LLMConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `model` field instead. The LLM configuration used by the agent.", + "deprecated": true + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "The embedding configuration used by the agent." + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Parallel Tool Calls", + "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.", + "deprecated": true + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + "deprecated": true + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "Deprecated: Use `model` field to configure max output tokens instead. The maximum number of tokens to generate, including reasoning step.", + "deprecated": true + }, + "enable_sleeptime": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Sleeptime", + "description": "If set to True, memory management will move to a background agent thread." + }, + "last_run_completion": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Last Run Completion", + "description": "The timestamp when the agent last completed a run." + }, + "last_run_duration_ms": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Last Run Duration Ms", + "description": "The duration in milliseconds of the agent's last run." + }, + "last_stop_reason": { + "anyOf": [ + { + "$ref": "#/components/schemas/StopReasonType" + }, + { + "type": "null" + } + ], + "description": "The stop reason from the agent's last run." + }, + "timezone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The timezone of the agent (IANA format)." + }, + "max_files_open": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Files Open", + "description": "Maximum number of files that can be open at once for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "per_file_view_window_char_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Per File View Window Char Limit", + "description": "The per-file view window character limit for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "If set to True, the agent will be hidden." + } + }, + "type": "object", + "title": "UpdateAgent" + }, + "UpdateAssistantMessage": { + "properties": { + "message_type": { + "type": "string", + "const": "assistant_message", + "title": "Message Type", + "default": "assistant_message" + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaAssistantMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The message content sent by the assistant (can be a string or an array of content parts)" + } + }, + "type": "object", + "required": ["content"], + "title": "UpdateAssistantMessage" + }, + "UpdateConversation": { + "properties": { + "summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Summary", + "description": "A summary of the conversation." + } + }, + "type": "object", + "title": "UpdateConversation", + "description": "Request model for updating a conversation." + }, + "UpdateMCPServerRequest": { + "properties": { + "server_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Name", + "description": "The name of the MCP server" + }, + "config": { + "oneOf": [ + { + "$ref": "#/components/schemas/letta__schemas__mcp_server__UpdateStdioMCPServer" + }, + { + "$ref": "#/components/schemas/letta__schemas__mcp_server__UpdateSSEMCPServer" + }, + { + "$ref": "#/components/schemas/letta__schemas__mcp_server__UpdateStreamableHTTPMCPServer" + } + ], + "title": "Config", + "description": "The MCP server configuration updates (Stdio, SSE, or Streamable HTTP)", + "discriminator": { + "propertyName": "mcp_server_type", + "mapping": { + "sse": "#/components/schemas/letta__schemas__mcp_server__UpdateSSEMCPServer", + "stdio": "#/components/schemas/letta__schemas__mcp_server__UpdateStdioMCPServer", + "streamable_http": "#/components/schemas/letta__schemas__mcp_server__UpdateStreamableHTTPMCPServer" + } + } + } + }, + "additionalProperties": false, + "type": "object", + "required": ["config"], + "title": "UpdateMCPServerRequest", + "description": "Request to update an existing MCP server configuration." + }, + "UpdateReasoningMessage": { + "properties": { + "reasoning": { + "type": "string", + "title": "Reasoning" + }, + "message_type": { + "type": "string", + "const": "reasoning_message", + "title": "Message Type", + "default": "reasoning_message" + } + }, + "type": "object", + "required": ["reasoning"], + "title": "UpdateReasoningMessage" + }, + "UpdateSystemMessage": { + "properties": { + "message_type": { + "type": "string", + "const": "system_message", + "title": "Message Type", + "default": "system_message" + }, + "content": { + "type": "string", + "title": "Content", + "description": "The message content sent by the system (can be a string or an array of multi-modal content parts)" + } + }, + "type": "object", + "required": ["content"], + "title": "UpdateSystemMessage" + }, + "UpdateUserMessage": { + "properties": { + "message_type": { + "type": "string", + "const": "user_message", + "title": "Message Type", + "default": "user_message" + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaUserMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The message content sent by the user (can be a string or an array of multi-modal content parts)" + } + }, + "type": "object", + "required": ["content"], + "title": "UpdateUserMessage" + }, + "UrlImage": { + "properties": { + "type": { + "type": "string", + "const": "url", + "title": "Type", + "description": "The source type for the image.", + "default": "url" + }, + "url": { + "type": "string", + "title": "Url", + "description": "The URL of the image." + } + }, + "type": "object", + "required": ["url"], + "title": "UrlImage" + }, + "UsageStatistics": { + "properties": { + "completion_tokens": { + "type": "integer", + "title": "Completion Tokens", + "default": 0 + }, + "prompt_tokens": { + "type": "integer", + "title": "Prompt Tokens", + "default": 0 + }, + "total_tokens": { + "type": "integer", + "title": "Total Tokens", + "default": 0 + }, + "prompt_tokens_details": { + "anyOf": [ + { + "$ref": "#/components/schemas/UsageStatisticsPromptTokenDetails" + }, + { + "type": "null" + } + ] + }, + "completion_tokens_details": { + "anyOf": [ + { + "$ref": "#/components/schemas/UsageStatisticsCompletionTokenDetails" + }, + { + "type": "null" + } + ] + } + }, + "type": "object", + "title": "UsageStatistics" + }, + "UsageStatisticsCompletionTokenDetails": { + "properties": { + "reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Reasoning Tokens" + } + }, + "type": "object", + "title": "UsageStatisticsCompletionTokenDetails" + }, + "UsageStatisticsPromptTokenDetails": { + "properties": { + "cached_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cached Tokens" + }, + "cache_read_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cache Read Tokens" + }, + "cache_creation_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cache Creation Tokens" + } + }, + "type": "object", + "title": "UsageStatisticsPromptTokenDetails" + }, + "User": { + "properties": { + "id": { + "type": "string", + "pattern": "^user-[a-fA-F0-9]{8}", + "title": "Id", + "description": "The human-friendly ID of the User", + "examples": ["user-123e4567-e89b-12d3-a456-426614174000"] + }, + "name": { + "type": "string", + "title": "Name", + "description": "The name of the user." + }, + "created_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "The creation date of the user." + }, + "updated_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Updated At", + "description": "The update date of the user." + }, + "is_deleted": { + "type": "boolean", + "title": "Is Deleted", + "description": "Whether this user is deleted or not.", + "default": false + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name"], + "title": "User", + "description": "Representation of a user." + }, + "UserCreate": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the user." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["name", "organization_id"], + "title": "UserCreate" + }, + "UserMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "date": { + "type": "string", + "format": "date-time", + "title": "Date" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "message_type": { + "type": "string", + "const": "user_message", + "title": "Message Type", + "description": "The type of the message.", + "default": "user_message" + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id" + }, + "step_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Step Id" + }, + "is_err": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Is Err" + }, + "seq_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Seq Id" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaUserMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The message content sent by the user (can be a string or an array of multi-modal content parts)" + } + }, + "type": "object", + "required": ["id", "date", "content"], + "title": "UserMessage", + "description": "A message sent by the user. Never streamed back on a response, only used for cursor pagination.\n\nArgs:\n id (str): The ID of the message\n date (datetime): The date the message was created in ISO format\n name (Optional[str]): The name of the sender of the message\n content (Union[str, List[LettaUserMessageContentUnion]]): The message content sent by the user (can be a string or an array of multi-modal content parts)" + }, + "UserMessageListResult": { + "properties": { + "message_type": { + "type": "string", + "const": "user_message", + "title": "Message Type", + "default": "user_message" + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaUserMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The message content sent by the user (can be a string or an array of multi-modal content parts)" + }, + "message_id": { + "type": "string", + "title": "Message Id", + "description": "The unique identifier of the message." + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent that owns the message." + }, + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The unique identifier of the conversation that the message belongs to." + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The time the message was created in ISO format." + } + }, + "type": "object", + "required": ["content", "message_id", "created_at"], + "title": "UserMessageListResult", + "description": "User message list result with agent context.\n\nShape is identical to UpdateUserMessage but includes the owning agent_id and message id." + }, + "UserUpdate": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the user to update." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The new name of the user." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["id"], + "title": "UserUpdate" + }, + "ValidationError": { + "properties": { + "loc": { + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ] + }, + "type": "array", + "title": "Location" + }, + "msg": { + "type": "string", + "title": "Message" + }, + "type": { + "type": "string", + "title": "Error Type" + } + }, + "type": "object", + "required": ["loc", "msg", "type"], + "title": "ValidationError" + }, + "VectorDBProvider": { + "type": "string", + "enum": ["native", "tpuf", "pinecone"], + "title": "VectorDBProvider", + "description": "Supported vector database providers for archival memory" + }, + "VoiceSleeptimeManager": { + "properties": { + "manager_type": { + "type": "string", + "const": "voice_sleeptime", + "title": "Manager Type", + "description": "", + "default": "voice_sleeptime" + }, + "manager_agent_id": { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "title": "Manager Agent Id", + "description": "", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + "max_message_buffer_length": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Message Buffer Length", + "description": "The desired maximum length of messages in the context window of the convo agent. This is a best effort, and may be off slightly due to user/assistant interleaving." + }, + "min_message_buffer_length": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Min Message Buffer Length", + "description": "The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving." + } + }, + "type": "object", + "required": ["manager_agent_id"], + "title": "VoiceSleeptimeManager" + }, + "VoiceSleeptimeManagerUpdate": { + "properties": { + "manager_type": { + "type": "string", + "const": "voice_sleeptime", + "title": "Manager Type", + "description": "", + "default": "voice_sleeptime" + }, + "manager_agent_id": { + "anyOf": [ + { + "type": "string", + "maxLength": 42, + "minLength": 42, + "pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "The ID of the agent in the format 'agent-'", + "examples": ["agent-123e4567-e89b-42d3-8456-426614174000"] + }, + { + "type": "null" + } + ], + "title": "Manager Agent Id", + "description": "" + }, + "max_message_buffer_length": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Message Buffer Length", + "description": "The desired maximum length of messages in the context window of the convo agent. This is a best effort, and may be off slightly due to user/assistant interleaving." + }, + "min_message_buffer_length": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Min Message Buffer Length", + "description": "The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving." + } + }, + "type": "object", + "title": "VoiceSleeptimeManagerUpdate" + }, + "XAIModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "xai", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "xai" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "XAIModelSettings", + "description": "xAI model configuration (OpenAI-compatible)." + }, + "ZAIModelSettings": { + "properties": { + "max_output_tokens": { + "type": "integer", + "title": "Max Output Tokens", + "description": "The maximum number of tokens the model can generate.", + "default": 4096 + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "description": "Whether to enable parallel tool calling.", + "default": false + }, + "provider_type": { + "type": "string", + "const": "zai", + "title": "Provider Type", + "description": "The type of the provider.", + "default": "zai" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature of the model.", + "default": 0.7 + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model." + } + }, + "type": "object", + "title": "ZAIModelSettings", + "description": "Z.ai (ZhipuAI) model configuration (OpenAI-compatible)." + }, + "letta__schemas__agent_file__AgentSchema": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the agent." + }, + "memory_blocks": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/CreateBlock" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Memory Blocks", + "description": "The blocks to create in the agent's in-context memory." + }, + "tools": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools", + "description": "The tools used by the agent." + }, + "tool_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Ids", + "description": "The ids of the tools used by the agent." + }, + "source_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Source Ids", + "description": "Deprecated: Use `folder_ids` field instead. The ids of the sources used by the agent.", + "deprecated": true + }, + "folder_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Folder Ids", + "description": "The ids of the folders used by the agent." + }, + "block_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Block Ids", + "description": "The ids of the blocks used by the agent." + }, + "tool_rules": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/ChildToolRule" + }, + { + "$ref": "#/components/schemas/InitToolRule" + }, + { + "$ref": "#/components/schemas/TerminalToolRule" + }, + { + "$ref": "#/components/schemas/ConditionalToolRule" + }, + { + "$ref": "#/components/schemas/ContinueToolRule" + }, + { + "$ref": "#/components/schemas/RequiredBeforeExitToolRule" + }, + { + "$ref": "#/components/schemas/MaxCountPerStepToolRule" + }, + { + "$ref": "#/components/schemas/ParentToolRule" + }, + { + "$ref": "#/components/schemas/RequiresApprovalToolRule" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "conditional": "#/components/schemas/ConditionalToolRule", + "constrain_child_tools": "#/components/schemas/ChildToolRule", + "continue_loop": "#/components/schemas/ContinueToolRule", + "exit_loop": "#/components/schemas/TerminalToolRule", + "max_count_per_step": "#/components/schemas/MaxCountPerStepToolRule", + "parent_last_tool": "#/components/schemas/ParentToolRule", + "required_before_exit": "#/components/schemas/RequiredBeforeExitToolRule", + "requires_approval": "#/components/schemas/RequiresApprovalToolRule", + "run_first": "#/components/schemas/InitToolRule" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Rules", + "description": "The tool rules governing the agent." + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tags", + "description": "The tags associated with the agent." + }, + "system": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System", + "description": "The system prompt used by the agent." + }, + "agent_type": { + "$ref": "#/components/schemas/AgentType", + "description": "The type of agent." + }, + "initial_message_sequence": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/MessageCreate" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Initial Message Sequence", + "description": "The initial set of messages to put in the agent's in-context memory." + }, + "include_base_tools": { + "type": "boolean", + "title": "Include Base Tools", + "description": "If true, attaches the Letta core tools (e.g. core_memory related functions).", + "default": true + }, + "include_multi_agent_tools": { + "type": "boolean", + "title": "Include Multi Agent Tools", + "description": "If true, attaches the Letta multi-agent tools (e.g. sending a message to another agent).", + "default": false + }, + "include_base_tool_rules": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Base Tool Rules", + "description": "If true, attaches the Letta base tool rules (e.g. deny all tools not explicitly allowed)." + }, + "include_default_source": { + "type": "boolean", + "title": "Include Default Source", + "description": "If true, automatically creates and attaches a default data source for this agent.", + "default": false, + "deprecated": true + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the agent." + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "The metadata of the agent." + }, + "llm_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/LLMConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `model` field instead. The LLM configuration used by the agent.", + "deprecated": true + }, + "embedding_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + { + "type": "null" + } + ], + "description": "Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", + "deprecated": true + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle for the agent to use (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/ZAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + }, + { + "$ref": "#/components/schemas/OpenRouterModelSettings" + }, + { + "$ref": "#/components/schemas/ChatGPTOAuthModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "chatgpt_oauth": "#/components/schemas/ChatGPTOAuthModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "openrouter": "#/components/schemas/OpenRouterModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings", + "zai": "#/components/schemas/ZAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "The model settings for the agent." + }, + "compaction_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionSettings-Input" + }, + { + "type": "null" + } + ], + "description": "The compaction settings configuration used for compaction." + }, + "context_window_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Context Window Limit", + "description": "The context window limit used by the agent." + }, + "embedding_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Embedding Chunk Size", + "description": "Deprecated: No longer used. The embedding chunk size used by the agent.", + "default": 300, + "deprecated": true + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "Deprecated: Use `model` field to configure max output tokens instead. The maximum number of tokens to generate, including reasoning step.", + "deprecated": true + }, + "max_reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Reasoning Tokens", + "description": "Deprecated: Use `model` field to configure reasoning tokens instead. The maximum number of tokens to generate for reasoning step.", + "deprecated": true + }, + "enable_reasoner": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Reasoner", + "description": "Deprecated: Use `model` field to configure reasoning instead. Whether to enable internal extended thinking step for a reasoner model.", + "default": true, + "deprecated": true + }, + "reasoning": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Reasoning", + "description": "Deprecated: Use `model` field to configure reasoning instead. Whether to enable reasoning for this agent.", + "deprecated": true + }, + "from_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "From Template", + "description": "Deprecated: please use the 'create agents from a template' endpoint instead.", + "deprecated": true + }, + "template": { + "type": "boolean", + "title": "Template", + "description": "Deprecated: No longer used.", + "default": false, + "deprecated": true + }, + "project": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project", + "description": "Deprecated: Project should now be passed via the X-Project header instead of in the request body. If using the SDK, this can be done via the x_project parameter.", + "deprecated": true + }, + "tool_exec_environment_variables": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Tool Exec Environment Variables", + "description": "Deprecated: Use `secrets` field instead. Environment variables for tool execution.", + "deprecated": true + }, + "secrets": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Secrets", + "description": "The environment variables for tool execution specific to this agent." + }, + "memory_variables": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Memory Variables", + "description": "Deprecated: Only relevant for creating agents from a template. Use the 'create agents from a template' endpoint instead.", + "deprecated": true + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "Deprecated: No longer used. The id of the project the agent belongs to.", + "deprecated": true + }, + "template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Template Id", + "description": "Deprecated: No longer used. The id of the template the agent belongs to.", + "deprecated": true + }, + "base_template_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Base Template Id", + "description": "Deprecated: No longer used. The base template id of the agent.", + "deprecated": true + }, + "identity_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Identity Ids", + "description": "The ids of the identities associated with this agent." + }, + "message_buffer_autoclear": { + "type": "boolean", + "title": "Message Buffer Autoclear", + "description": "If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.", + "default": false + }, + "enable_sleeptime": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Sleeptime", + "description": "If set to True, memory management will move to a background agent thread." + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + "deprecated": true + }, + "timezone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The timezone of the agent (IANA format)." + }, + "max_files_open": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Files Open", + "description": "Maximum number of files that can be open at once for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "per_file_view_window_char_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Per File View Window Char Limit", + "description": "The per-file view window character limit for this agent. Setting this too high may exceed the context window, which will break the agent." + }, + "hidden": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Hidden", + "description": "Deprecated: No longer used. If set to True, the agent will be hidden.", + "deprecated": true + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Parallel Tool Calls", + "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.", + "deprecated": true + }, + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this agent in the file" + }, + "in_context_message_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "In Context Message Ids", + "description": "List of message IDs that are currently in the agent's context" + }, + "messages": { + "items": { + "$ref": "#/components/schemas/letta__schemas__agent_file__MessageSchema" + }, + "type": "array", + "title": "Messages", + "description": "List of messages in the agent's conversation history" + }, + "files_agents": { + "items": { + "$ref": "#/components/schemas/FileAgentSchema" + }, + "type": "array", + "title": "Files Agents", + "description": "List of file-agent relationships for this agent" + }, + "group_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Group Ids", + "description": "List of groups that the agent manages" + } + }, + "type": "object", + "required": ["id"], + "title": "AgentSchema", + "description": "Agent with human-readable ID for agent file" + }, + "letta__schemas__agent_file__MessageSchema": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "const": "message" + }, + { + "type": "null" + } + ], + "title": "Type", + "description": "The message type to be created.", + "default": "message" + }, + "role": { + "$ref": "#/components/schemas/MessageRole", + "description": "The role of the participant." + }, + "content": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaMessageContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Content", + "description": "The content of the message." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the participant." + }, + "otid": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Otid", + "description": "The offline threading id associated with this message" + }, + "sender_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sender Id", + "description": "The id of the sender of the message, can be an identity id or agent id" + }, + "batch_item_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Batch Item Id", + "description": "The id of the LLMBatchItem that this message is associated with" + }, + "group_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Group Id", + "description": "The multi-agent group that the message was sent in" + }, + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this message in the file" + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model used to make the function call" + }, + "agent_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Agent Id", + "description": "The unique identifier of the agent" + }, + "tool_calls": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ChatCompletionMessageFunctionToolCall-Input" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Calls", + "description": "The list of tool calls requested. Only applicable for role assistant." + }, + "tool_call_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Tool Call Id", + "description": "The ID of the tool call. Only applicable for role tool." + }, + "tool_returns": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/letta__schemas__message__ToolReturn-Input" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Returns", + "description": "Tool execution return information for prior tool calls" + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "The timestamp when the object was created." + }, + "approve": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Approve", + "description": "Whether the tool has been approved" + }, + "approval_request_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Approval Request Id", + "description": "The message ID of the approval request" + }, + "denial_reason": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Denial Reason", + "description": "An optional explanation for the provided approval status" + }, + "approvals": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/ApprovalReturn" + }, + { + "$ref": "#/components/schemas/letta__schemas__message__ToolReturn-Input" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Approvals", + "description": "Approval returns for the message" + } + }, + "type": "object", + "required": ["role", "content", "id"], + "title": "MessageSchema", + "description": "Message with human-readable ID for agent file" + }, + "letta__schemas__agent_file__ToolSchema": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "Human-readable identifier for this tool in the file" + }, + "tool_type": { + "$ref": "#/components/schemas/ToolType", + "description": "The type of the tool.", + "default": "custom" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "The description of the tool." + }, + "source_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Type", + "description": "The type of the source code." + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name", + "description": "The name of the function." + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags", + "description": "Metadata tags.", + "default": [] + }, + "source_code": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Code", + "description": "The source code of the function." + }, + "json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Json Schema", + "description": "The JSON schema of the function." + }, + "args_json_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Args Json Schema", + "description": "The args JSON schema of the function." + }, + "return_char_limit": { + "type": "integer", + "maximum": 1000000, + "minimum": 1, + "title": "Return Char Limit", + "description": "The maximum number of characters in the response.", + "default": 50000 + }, + "pip_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/PipRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Pip Requirements", + "description": "Optional list of pip packages required by this tool." + }, + "npm_requirements": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/NpmRequirement" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Npm Requirements", + "description": "Optional list of npm packages required by this tool." + }, + "default_requires_approval": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Default Requires Approval", + "description": "Default value for whether or not executing this tool requires approval." + }, + "enable_parallel_execution": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Parallel Execution", + "description": "If set to True, then this tool will potentially be executed concurrently with other tools. Default False.", + "default": false + }, + "created_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created By Id", + "description": "The id of the user that made this Tool." + }, + "last_updated_by_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Updated By Id", + "description": "The id of the user that made this Tool." + }, + "metadata_": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata", + "description": "A dictionary of additional metadata for the tool." + }, + "project_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Project Id", + "description": "The project id of the tool." + } + }, + "additionalProperties": false, + "type": "object", + "required": ["id"], + "title": "ToolSchema", + "description": "Tool with human-readable ID for agent file" + }, + "letta__schemas__letta_message__ToolReturn": { + "properties": { + "type": { + "type": "string", + "const": "tool", + "title": "Type", + "description": "The message type to be created.", + "default": "tool" + }, + "tool_return": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/LettaToolReturnContentUnion" + }, + "type": "array" + }, + { + "type": "string" + } + ], + "title": "Tool Return", + "description": "The tool return value - either a string or list of content parts (text/image)" + }, + "status": { + "type": "string", + "enum": ["success", "error"], + "title": "Status" + }, + "tool_call_id": { + "type": "string", + "title": "Tool Call Id" + }, + "stdout": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stdout" + }, + "stderr": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stderr" + } + }, + "type": "object", + "required": ["tool_return", "status", "tool_call_id"], + "title": "ToolReturn" + }, + "letta__schemas__mcp__UpdateSSEMCPServer": { + "properties": { + "server_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Name", + "description": "The name of the MCP server" + }, + "server_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Url", + "description": "The URL of the server (MCP SSE client will connect to this URL)" + }, + "token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Token", + "description": "The access token or API key for the MCP server (used for SSE authentication)" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom authentication headers as key-value pairs" + } + }, + "additionalProperties": false, + "type": "object", + "title": "UpdateSSEMCPServer", + "description": "Update an SSE MCP server" + }, + "letta__schemas__mcp__UpdateStdioMCPServer": { + "properties": { + "server_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Name", + "description": "The name of the MCP server" + }, + "stdio_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/StdioServerConfig" + }, + { + "type": "null" + } + ], + "description": "The configuration for the server (MCP 'local' client will run this command)" + } + }, + "additionalProperties": false, + "type": "object", + "title": "UpdateStdioMCPServer", + "description": "Update a Stdio MCP server" + }, + "letta__schemas__mcp__UpdateStreamableHTTPMCPServer": { + "properties": { + "server_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Name", + "description": "The name of the MCP server" + }, + "server_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Url", + "description": "The URL path for the streamable HTTP server (e.g., 'example/mcp')" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom authentication headers as key-value pairs" + } + }, + "additionalProperties": false, + "type": "object", + "title": "UpdateStreamableHTTPMCPServer", + "description": "Update a Streamable HTTP MCP server" + }, + "letta__schemas__mcp_server__ToolExecuteRequest": { + "properties": { + "args": { + "additionalProperties": true, + "type": "object", + "title": "Args", + "description": "Arguments to pass to the tool" + } + }, + "additionalProperties": false, + "type": "object", + "title": "ToolExecuteRequest", + "description": "Request to execute a tool." + }, + "letta__schemas__mcp_server__UpdateSSEMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "sse", + "title": "Mcp Server Type", + "default": "sse" + }, + "server_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["server_url"], + "title": "UpdateSSEMCPServer", + "description": "Update schema for SSE MCP server - all fields optional" + }, + "letta__schemas__mcp_server__UpdateStdioMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "stdio", + "title": "Mcp Server Type", + "default": "stdio" + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Command", + "description": "The command to run (MCP 'local' client will run this command)" + }, + "args": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Args", + "description": "The arguments to pass to the command" + }, + "env": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Env", + "description": "Environment variables to set" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["command", "args"], + "title": "UpdateStdioMCPServer", + "description": "Update schema for Stdio MCP server - all fields optional" + }, + "letta__schemas__mcp_server__UpdateStreamableHTTPMCPServer": { + "properties": { + "mcp_server_type": { + "type": "string", + "const": "streamable_http", + "title": "Mcp Server Type", + "default": "streamable_http" + }, + "server_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Server Url", + "description": "The URL of the server" + }, + "auth_header": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Header", + "description": "The name of the authentication header (e.g., 'Authorization')" + }, + "auth_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auth Token", + "description": "The authentication token or API key value" + }, + "custom_headers": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Custom Headers", + "description": "Custom HTTP headers to include with requests" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["server_url"], + "title": "UpdateStreamableHTTPMCPServer", + "description": "Update schema for Streamable HTTP MCP server - all fields optional" + }, + "letta__schemas__message__ToolReturn-Input": { + "properties": { + "tool_call_id": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "title": "Tool Call Id", + "description": "The ID for the tool call" + }, + "status": { + "type": "string", + "enum": ["success", "error"], + "title": "Status", + "description": "The status of the tool call" + }, + "stdout": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stdout", + "description": "Captured stdout (e.g. prints, logs) from the tool invocation" + }, + "stderr": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stderr", + "description": "Captured stderr from the tool invocation" + }, + "func_response": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "text": "#/components/schemas/TextContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Func Response", + "description": "The function response - either a string or list of content parts (text/image)" + } + }, + "type": "object", + "required": ["status"], + "title": "ToolReturn" + }, + "letta__schemas__message__ToolReturn-Output": { + "properties": { + "tool_call_id": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "title": "Tool Call Id", + "description": "The ID for the tool call" + }, + "status": { + "type": "string", + "enum": ["success", "error"], + "title": "Status", + "description": "The status of the tool call" + }, + "stdout": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stdout", + "description": "Captured stdout (e.g. prints, logs) from the tool invocation" + }, + "stderr": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Stderr", + "description": "Captured stderr from the tool invocation" + }, + "func_response": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "image": "#/components/schemas/ImageContent", + "text": "#/components/schemas/TextContent" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Func Response", + "description": "The function response - either a string or list of content parts (text/image)" + } + }, + "type": "object", + "required": ["status"], + "title": "ToolReturn" + }, + "letta__serialize_schemas__pydantic_agent_schema__AgentSchema": { + "properties": { + "agent_type": { + "type": "string", + "title": "Agent Type" + }, + "core_memory": { + "items": { + "$ref": "#/components/schemas/CoreMemoryBlockSchema" + }, + "type": "array", + "title": "Core Memory" + }, + "created_at": { + "type": "string", + "title": "Created At" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "embedding_config": { + "$ref": "#/components/schemas/EmbeddingConfig" + }, + "llm_config": { + "$ref": "#/components/schemas/LLMConfig" + }, + "message_buffer_autoclear": { + "type": "boolean", + "title": "Message Buffer Autoclear" + }, + "in_context_message_indices": { + "items": { + "type": "integer" + }, + "type": "array", + "title": "In Context Message Indices" + }, + "messages": { + "items": { + "$ref": "#/components/schemas/letta__serialize_schemas__pydantic_agent_schema__MessageSchema" + }, + "type": "array", + "title": "Messages" + }, + "metadata_": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata" + }, + "multi_agent_group": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "title": "Multi Agent Group" + }, + "name": { + "type": "string", + "title": "Name" + }, + "system": { + "type": "string", + "title": "System" + }, + "tags": { + "items": { + "$ref": "#/components/schemas/TagSchema" + }, + "type": "array", + "title": "Tags" + }, + "tool_exec_environment_variables": { + "items": { + "$ref": "#/components/schemas/ToolEnvVarSchema" + }, + "type": "array", + "title": "Tool Exec Environment Variables" + }, + "tool_rules": { + "items": { + "anyOf": [ + { + "$ref": "#/components/schemas/BaseToolRuleSchema" + }, + { + "$ref": "#/components/schemas/ChildToolRuleSchema" + }, + { + "$ref": "#/components/schemas/MaxCountPerStepToolRuleSchema" + }, + { + "$ref": "#/components/schemas/ConditionalToolRuleSchema" + } + ] + }, + "type": "array", + "title": "Tool Rules" + }, + "tools": { + "items": { + "$ref": "#/components/schemas/letta__serialize_schemas__pydantic_agent_schema__ToolSchema" + }, + "type": "array", + "title": "Tools" + }, + "updated_at": { + "type": "string", + "title": "Updated At" + }, + "version": { + "type": "string", + "title": "Version" + } + }, + "type": "object", + "required": [ + "agent_type", + "core_memory", + "created_at", + "description", + "embedding_config", + "llm_config", + "message_buffer_autoclear", + "in_context_message_indices", + "messages", + "multi_agent_group", + "name", + "system", + "tags", + "tool_exec_environment_variables", + "tool_rules", + "tools", + "updated_at", + "version" + ], + "title": "AgentSchema" + }, + "letta__serialize_schemas__pydantic_agent_schema__MessageSchema": { + "properties": { + "created_at": { + "type": "string", + "title": "Created At" + }, + "group_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Group Id" + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model" + }, + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "role": { + "type": "string", + "title": "Role" + }, + "content": { + "items": { + "$ref": "#/components/schemas/LettaMessageContentUnion" + }, + "type": "array", + "title": "Content" + }, + "tool_call_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Tool Call Id" + }, + "tool_calls": { + "items": {}, + "type": "array", + "title": "Tool Calls" + }, + "tool_returns": { + "items": {}, + "type": "array", + "title": "Tool Returns" + }, + "updated_at": { + "type": "string", + "title": "Updated At" + } + }, + "type": "object", + "required": [ + "created_at", + "group_id", + "model", + "name", + "role", + "content", + "tool_call_id", + "tool_calls", + "tool_returns", + "updated_at" + ], + "title": "MessageSchema" + }, + "letta__serialize_schemas__pydantic_agent_schema__ToolSchema": { + "properties": { + "args_json_schema": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "title": "Args Json Schema" + }, + "created_at": { + "type": "string", + "title": "Created At" + }, + "description": { + "type": "string", + "title": "Description" + }, + "json_schema": { + "$ref": "#/components/schemas/ToolJSONSchema" + }, + "name": { + "type": "string", + "title": "Name" + }, + "return_char_limit": { + "type": "integer", + "title": "Return Char Limit" + }, + "source_code": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Code" + }, + "source_type": { + "type": "string", + "title": "Source Type" + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags" + }, + "tool_type": { + "type": "string", + "title": "Tool Type" + }, + "updated_at": { + "type": "string", + "title": "Updated At" + }, + "metadata_": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata" + } + }, + "type": "object", + "required": [ + "args_json_schema", + "created_at", + "description", + "json_schema", + "name", + "return_char_limit", + "source_code", + "source_type", + "tags", + "tool_type", + "updated_at" + ], + "title": "ToolSchema" + }, + "letta__server__rest_api__routers__v1__tools__ToolExecuteRequest": { + "properties": { + "args": { + "additionalProperties": true, + "type": "object", + "title": "Args", + "description": "Arguments to pass to the tool" + } + }, + "type": "object", + "title": "ToolExecuteRequest" + }, + "openai__types__chat__chat_completion_message_function_tool_call__Function": { + "properties": { + "arguments": { + "type": "string", + "title": "Arguments" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "additionalProperties": true, + "type": "object", + "required": ["arguments", "name"], + "title": "Function", + "description": "The function that the model called." + }, + "openai__types__chat__chat_completion_message_function_tool_call_param__Function": { + "properties": { + "arguments": { + "type": "string", + "title": "Arguments" + }, + "name": { + "type": "string", + "title": "Name" + } + }, + "type": "object", + "required": ["arguments", "name"], + "title": "Function", + "description": "The function that the model called." + }, + "LettaMessageUnion": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ReasoningMessage" + }, + { + "$ref": "#/components/schemas/HiddenReasoningMessage" + }, + { + "$ref": "#/components/schemas/ToolCallMessage" + }, + { + "$ref": "#/components/schemas/ToolReturnMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ApprovalRequestMessage" + }, + { + "$ref": "#/components/schemas/ApprovalResponseMessage" + }, + { + "$ref": "#/components/schemas/SummaryMessage" + }, + { + "$ref": "#/components/schemas/EventMessage" + } + ], + "discriminator": { + "propertyName": "message_type", + "mapping": { + "system_message": "#/components/schemas/SystemMessage", + "user_message": "#/components/schemas/UserMessage", + "reasoning_message": "#/components/schemas/ReasoningMessage", + "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage", + "tool_call_message": "#/components/schemas/ToolCallMessage", + "tool_return_message": "#/components/schemas/ToolReturnMessage", + "assistant_message": "#/components/schemas/AssistantMessage", + "approval_request_message": "#/components/schemas/ApprovalRequestMessage", + "approval_response_message": "#/components/schemas/ApprovalResponseMessage", + "summary": "#/components/schemas/SummaryMessage", + "event": "#/components/schemas/EventMessage" + } + } + }, + "LettaMessageContentUnion": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + }, + { + "$ref": "#/components/schemas/ToolCallContent" + }, + { + "$ref": "#/components/schemas/ToolReturnContent" + }, + { + "$ref": "#/components/schemas/ReasoningContent" + }, + { + "$ref": "#/components/schemas/RedactedReasoningContent" + }, + { + "$ref": "#/components/schemas/OmittedReasoningContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "text": "#/components/schemas/TextContent", + "image": "#/components/schemas/ImageContent", + "tool_call": "#/components/schemas/ToolCallContent", + "tool_return": "#/components/schemas/ToolCallContent", + "reasoning": "#/components/schemas/ReasoningContent", + "redacted_reasoning": "#/components/schemas/RedactedReasoningContent", + "omitted_reasoning": "#/components/schemas/OmittedReasoningContent" + } + } + }, + "LettaAssistantMessageContentUnion": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "text": "#/components/schemas/TextContent" + } + } + }, + "LettaToolReturnContentUnion": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "text": "#/components/schemas/TextContent", + "image": "#/components/schemas/ImageContent" + } + } + }, + "LettaUserMessageContentUnion": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContent" + }, + { + "$ref": "#/components/schemas/ImageContent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "text": "#/components/schemas/TextContent", + "image": "#/components/schemas/ImageContent" + } + } + } + }, + "securitySchemes": { + "bearerAuth": { + "type": "http", + "scheme": "bearer" + } + } + } +} diff --git a/letta/__init__.py b/letta/__init__.py index 92224bcd..5019bbad 100644 --- a/letta/__init__.py +++ b/letta/__init__.py @@ -5,7 +5,7 @@ try: __version__ = version("letta") except PackageNotFoundError: # Fallback for development installations - __version__ = "0.16.3" + __version__ = "0.16.4" if os.environ.get("LETTA_VERSION"): __version__ = os.environ["LETTA_VERSION"] diff --git a/letta/adapters/letta_llm_adapter.py b/letta/adapters/letta_llm_adapter.py index b782fa3d..b00a8edb 100644 --- a/letta/adapters/letta_llm_adapter.py +++ b/letta/adapters/letta_llm_adapter.py @@ -27,12 +27,16 @@ class LettaLLMAdapter(ABC): agent_id: str | None = None, agent_tags: list[str] | None = None, run_id: str | None = None, + org_id: str | None = None, + user_id: str | None = None, ) -> None: self.llm_client: LLMClientBase = llm_client self.llm_config: LLMConfig = llm_config self.agent_id: str | None = agent_id self.agent_tags: list[str] | None = agent_tags self.run_id: str | None = run_id + self.org_id: str | None = org_id + self.user_id: str | None = user_id self.message_id: str | None = None self.request_data: dict | None = None self.response_data: dict | None = None diff --git a/letta/adapters/letta_llm_request_adapter.py b/letta/adapters/letta_llm_request_adapter.py index 7635d424..5e472a35 100644 --- a/letta/adapters/letta_llm_request_adapter.py +++ b/letta/adapters/letta_llm_request_adapter.py @@ -127,6 +127,9 @@ class LettaLLMRequestAdapter(LettaLLMAdapter): agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, + org_id=self.org_id, + user_id=self.user_id, + llm_config=self.llm_config.model_dump() if self.llm_config else None, ), ), label="create_provider_trace", diff --git a/letta/adapters/letta_llm_stream_adapter.py b/letta/adapters/letta_llm_stream_adapter.py index 2929b1c4..4ae64e91 100644 --- a/letta/adapters/letta_llm_stream_adapter.py +++ b/letta/adapters/letta_llm_stream_adapter.py @@ -33,8 +33,10 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): agent_id: str | None = None, agent_tags: list[str] | None = None, run_id: str | None = None, + org_id: str | None = None, + user_id: str | None = None, ) -> None: - super().__init__(llm_client, llm_config, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id) + super().__init__(llm_client, llm_config, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, org_id=org_id, user_id=user_id) self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None async def invoke_llm( @@ -60,7 +62,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): self.request_data = request_data # Instantiate streaming interface - if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]: + if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock, ProviderType.minimax]: self.interface = AnthropicStreamingInterface( use_assistant_message=use_assistant_message, put_inner_thoughts_in_kwarg=self.llm_config.put_inner_thoughts_in_kwargs, @@ -68,7 +70,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): run_id=self.run_id, step_id=step_id, ) - elif self.llm_config.model_endpoint_type == ProviderType.openai: + elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.openrouter]: # For non-v1 agents, always use Chat Completions streaming interface self.interface = OpenAIStreamingInterface( use_assistant_message=use_assistant_message, @@ -114,64 +116,9 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): # Extract reasoning content from the interface self.reasoning_content = self.interface.get_reasoning_content() - # Extract usage statistics - # Some providers don't provide usage in streaming, use fallback if needed - if hasattr(self.interface, "input_tokens") and hasattr(self.interface, "output_tokens"): - # Handle cases where tokens might not be set (e.g., LMStudio) - input_tokens = self.interface.input_tokens - output_tokens = self.interface.output_tokens - - # Fallback to estimated values if not provided - if not input_tokens and hasattr(self.interface, "fallback_input_tokens"): - input_tokens = self.interface.fallback_input_tokens - if not output_tokens and hasattr(self.interface, "fallback_output_tokens"): - output_tokens = self.interface.fallback_output_tokens - - # Extract cache token data (OpenAI/Gemini use cached_tokens, Anthropic uses cache_read_tokens) - # None means provider didn't report, 0 means provider reported 0 - cached_input_tokens = None - if hasattr(self.interface, "cached_tokens") and self.interface.cached_tokens is not None: - cached_input_tokens = self.interface.cached_tokens - elif hasattr(self.interface, "cache_read_tokens") and self.interface.cache_read_tokens is not None: - cached_input_tokens = self.interface.cache_read_tokens - - # Extract cache write tokens (Anthropic only) - cache_write_tokens = None - if hasattr(self.interface, "cache_creation_tokens") and self.interface.cache_creation_tokens is not None: - cache_write_tokens = self.interface.cache_creation_tokens - - # Extract reasoning tokens (OpenAI o1/o3 models use reasoning_tokens, Gemini uses thinking_tokens) - reasoning_tokens = None - if hasattr(self.interface, "reasoning_tokens") and self.interface.reasoning_tokens is not None: - reasoning_tokens = self.interface.reasoning_tokens - elif hasattr(self.interface, "thinking_tokens") and self.interface.thinking_tokens is not None: - reasoning_tokens = self.interface.thinking_tokens - - # Calculate actual total input tokens - # - # ANTHROPIC: input_tokens is NON-cached only, must add cache tokens - # Total = input_tokens + cache_read_input_tokens + cache_creation_input_tokens - # - # OPENAI/GEMINI: input_tokens is already TOTAL - # cached_tokens is a subset, NOT additive - is_anthropic = hasattr(self.interface, "cache_read_tokens") or hasattr(self.interface, "cache_creation_tokens") - if is_anthropic: - actual_input_tokens = (input_tokens or 0) + (cached_input_tokens or 0) + (cache_write_tokens or 0) - else: - actual_input_tokens = input_tokens or 0 - - self.usage = LettaUsageStatistics( - step_count=1, - completion_tokens=output_tokens or 0, - prompt_tokens=actual_input_tokens, - total_tokens=actual_input_tokens + (output_tokens or 0), - cached_input_tokens=cached_input_tokens, - cache_write_tokens=cache_write_tokens, - reasoning_tokens=reasoning_tokens, - ) - else: - # Default usage statistics if not available - self.usage = LettaUsageStatistics(step_count=1, completion_tokens=0, prompt_tokens=0, total_tokens=0) + # Extract usage statistics from the streaming interface + self.usage = self.interface.get_usage_statistics() + self.usage.step_count = 1 # Store any additional data from the interface self.message_id = self.interface.letta_message_id @@ -236,6 +183,9 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, + org_id=self.org_id, + user_id=self.user_id, + llm_config=self.llm_config.model_dump() if self.llm_config else None, ), ), label="create_provider_trace", diff --git a/letta/adapters/simple_llm_request_adapter.py b/letta/adapters/simple_llm_request_adapter.py index 30243a9b..cf2dc741 100644 --- a/letta/adapters/simple_llm_request_adapter.py +++ b/letta/adapters/simple_llm_request_adapter.py @@ -46,6 +46,9 @@ class SimpleLLMRequestAdapter(LettaLLMRequestAdapter): agent_tags=self.agent_tags, run_id=self.run_id, call_type="agent_step", + org_id=self.org_id, + user_id=self.user_id, + llm_config=self.llm_config.model_dump() if self.llm_config else None, ) try: self.response_data = await self.llm_client.request_async_with_telemetry(request_data, self.llm_config) diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py index c2af996c..2313ff2b 100644 --- a/letta/adapters/simple_llm_stream_adapter.py +++ b/letta/adapters/simple_llm_stream_adapter.py @@ -14,8 +14,8 @@ from letta.schemas.enums import ProviderType from letta.schemas.letta_message import LettaMessage from letta.schemas.letta_message_content import LettaMessageContentUnion from letta.schemas.provider_trace import ProviderTrace -from letta.schemas.usage import LettaUsageStatistics from letta.schemas.user import User +from letta.server.rest_api.streaming_response import get_cancellation_event_for_run from letta.settings import settings from letta.utils import safe_create_task @@ -70,8 +70,11 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): # Store request data self.request_data = request_data + # Get cancellation event for this run to enable graceful cancellation (before branching) + cancellation_event = get_cancellation_event_for_run(self.run_id) if self.run_id else None + # Instantiate streaming interface - if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]: + if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock, ProviderType.minimax]: # NOTE: different self.interface = SimpleAnthropicStreamingInterface( requires_approval_tools=requires_approval_tools, @@ -81,6 +84,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): elif self.llm_config.model_endpoint_type in [ ProviderType.openai, ProviderType.deepseek, + ProviderType.openrouter, ProviderType.zai, ProviderType.chatgpt_oauth, ]: @@ -102,6 +106,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): requires_approval_tools=requires_approval_tools, run_id=self.run_id, step_id=step_id, + cancellation_event=cancellation_event, ) else: self.interface = SimpleOpenAIStreamingInterface( @@ -112,12 +117,14 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): model=self.llm_config.model, run_id=self.run_id, step_id=step_id, + cancellation_event=cancellation_event, ) elif self.llm_config.model_endpoint_type in [ProviderType.google_ai, ProviderType.google_vertex]: self.interface = SimpleGeminiStreamingInterface( requires_approval_tools=requires_approval_tools, run_id=self.run_id, step_id=step_id, + cancellation_event=cancellation_event, ) else: raise ValueError(f"Streaming not supported for provider {self.llm_config.model_endpoint_type}") @@ -157,68 +164,10 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): # Extract all content parts self.content: List[LettaMessageContentUnion] = self.interface.get_content() - # Extract usage statistics - # Some providers don't provide usage in streaming, use fallback if needed - if hasattr(self.interface, "input_tokens") and hasattr(self.interface, "output_tokens"): - # Handle cases where tokens might not be set (e.g., LMStudio) - input_tokens = self.interface.input_tokens - output_tokens = self.interface.output_tokens - - # Fallback to estimated values if not provided - if not input_tokens and hasattr(self.interface, "fallback_input_tokens"): - input_tokens = self.interface.fallback_input_tokens - if not output_tokens and hasattr(self.interface, "fallback_output_tokens"): - output_tokens = self.interface.fallback_output_tokens - - # Extract cache token data (OpenAI/Gemini use cached_tokens) - # None means provider didn't report, 0 means provider reported 0 - cached_input_tokens = None - if hasattr(self.interface, "cached_tokens") and self.interface.cached_tokens is not None: - cached_input_tokens = self.interface.cached_tokens - # Anthropic uses cache_read_tokens for cache hits - elif hasattr(self.interface, "cache_read_tokens") and self.interface.cache_read_tokens is not None: - cached_input_tokens = self.interface.cache_read_tokens - - # Extract cache write tokens (Anthropic only) - # None means provider didn't report, 0 means provider reported 0 - cache_write_tokens = None - if hasattr(self.interface, "cache_creation_tokens") and self.interface.cache_creation_tokens is not None: - cache_write_tokens = self.interface.cache_creation_tokens - - # Extract reasoning tokens (OpenAI o1/o3 models use reasoning_tokens, Gemini uses thinking_tokens) - # None means provider didn't report, 0 means provider reported 0 - reasoning_tokens = None - if hasattr(self.interface, "reasoning_tokens") and self.interface.reasoning_tokens is not None: - reasoning_tokens = self.interface.reasoning_tokens - elif hasattr(self.interface, "thinking_tokens") and self.interface.thinking_tokens is not None: - reasoning_tokens = self.interface.thinking_tokens - - # Calculate actual total input tokens for context window limit checks (summarization trigger). - # - # ANTHROPIC: input_tokens is NON-cached only, must add cache tokens - # Total = input_tokens + cache_read_input_tokens + cache_creation_input_tokens - # - # OPENAI/GEMINI: input_tokens (prompt_tokens/prompt_token_count) is already TOTAL - # cached_tokens is a subset, NOT additive - # Total = input_tokens (don't add cached_tokens or it double-counts!) - is_anthropic = hasattr(self.interface, "cache_read_tokens") or hasattr(self.interface, "cache_creation_tokens") - if is_anthropic: - actual_input_tokens = (input_tokens or 0) + (cached_input_tokens or 0) + (cache_write_tokens or 0) - else: - actual_input_tokens = input_tokens or 0 - - self.usage = LettaUsageStatistics( - step_count=1, - completion_tokens=output_tokens or 0, - prompt_tokens=actual_input_tokens, - total_tokens=actual_input_tokens + (output_tokens or 0), - cached_input_tokens=cached_input_tokens, - cache_write_tokens=cache_write_tokens, - reasoning_tokens=reasoning_tokens, - ) - else: - # Default usage statistics if not available - self.usage = LettaUsageStatistics(step_count=1, completion_tokens=0, prompt_tokens=0, total_tokens=0) + # Extract usage statistics from the interface + # Each interface implements get_usage_statistics() with provider-specific logic + self.usage = self.interface.get_usage_statistics() + self.usage.step_count = 1 # Store any additional data from the interface self.message_id = self.interface.letta_message_id @@ -283,6 +232,9 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, + org_id=self.org_id, + user_id=self.user_id, + llm_config=self.llm_config.model_dump() if self.llm_config else None, ), ), label="create_provider_trace", diff --git a/letta/agents/helpers.py b/letta/agents/helpers.py index b17f4ae9..2ce15c0f 100644 --- a/letta/agents/helpers.py +++ b/letta/agents/helpers.py @@ -97,6 +97,25 @@ async def _prepare_in_context_messages_async( return current_in_context_messages, new_in_context_messages +@trace_method +def validate_persisted_tool_call_ids(tool_return_message: Message, approval_response_message: ApprovalCreate) -> bool: + persisted_tool_returns = tool_return_message.tool_returns + if not persisted_tool_returns: + return False + persisted_tool_call_ids = [tool_return.tool_call_id for tool_return in persisted_tool_returns] + + approval_responses = approval_response_message.approvals + if not approval_responses: + return False + approval_response_tool_call_ids = [approval_response.tool_call_id for approval_response in approval_responses] + + request_response_diff = set(persisted_tool_call_ids).symmetric_difference(set(approval_response_tool_call_ids)) + if request_response_diff: + return False + + return True + + @trace_method def validate_approval_tool_call_ids(approval_request_message: Message, approval_response_message: ApprovalCreate): approval_requests = approval_request_message.tool_calls @@ -227,6 +246,36 @@ async def _prepare_in_context_messages_no_persist_async( if input_messages[0].type == "approval": # User is trying to send an approval response if current_in_context_messages and current_in_context_messages[-1].role != "approval": + # No pending approval request - check if this is an idempotent retry + # Check last few messages for a tool return matching the approval's tool_call_ids + # (approved tool return should be recent, but server-side tool calls may come after it) + approval_already_processed = False + recent_messages = current_in_context_messages[-10:] # Only check last 10 messages + for msg in reversed(recent_messages): + if msg.role == "tool" and validate_persisted_tool_call_ids(msg, input_messages[0]): + logger.info( + f"Idempotency check: Found matching tool return in recent history. " + f"tool_returns={msg.tool_returns}, approval_response.approvals={input_messages[0].approvals}" + ) + approval_already_processed = True + break + + if approval_already_processed: + # Approval already handled, just process follow-up messages if any or manually inject keep-alive message + keep_alive_messages = input_messages[1:] or [ + MessageCreate( + role="user", + content=[ + TextContent( + text="Automated keep-alive ping. Ignore this message and continue from where you stopped." + ) + ], + ) + ] + new_in_context_messages = await create_input_messages( + input_messages=keep_alive_messages, agent_id=agent_state.id, timezone=agent_state.timezone, run_id=run_id, actor=actor + ) + return current_in_context_messages, new_in_context_messages logger.warn( f"Cannot process approval response: No tool call is currently awaiting approval. Last message: {current_in_context_messages[-1]}" ) @@ -235,7 +284,7 @@ async def _prepare_in_context_messages_no_persist_async( "Please send a regular message to interact with the agent." ) validate_approval_tool_call_ids(current_in_context_messages[-1], input_messages[0]) - new_in_context_messages = create_approval_response_message_from_input( + new_in_context_messages = await create_approval_response_message_from_input( agent_state=agent_state, input_message=input_messages[0], run_id=run_id ) if len(input_messages) > 1: diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py index 05a2a867..3b359c72 100644 --- a/letta/agents/letta_agent.py +++ b/letta/agents/letta_agent.py @@ -218,6 +218,7 @@ class LettaAgent(BaseAgent): use_assistant_message: bool = True, request_start_timestamp_ns: int | None = None, include_return_message_types: list[MessageType] | None = None, + run_id: str | None = None, ): agent_state = await self.agent_manager.get_agent_by_id_async( agent_id=self.agent_id, @@ -330,6 +331,7 @@ class LettaAgent(BaseAgent): tool_rules_solver, agent_step_span, step_metrics, + run_id=run_id, ) in_context_messages = current_in_context_messages + new_in_context_messages @@ -418,6 +420,9 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, + llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None, ), ) step_progression = StepProgression.LOGGED_TRACE @@ -549,6 +554,7 @@ class LettaAgent(BaseAgent): llm_config=agent_state.llm_config, total_tokens=usage.total_tokens, force=False, + run_id=run_id, ) await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False) @@ -677,6 +683,7 @@ class LettaAgent(BaseAgent): tool_rules_solver, agent_step_span, step_metrics, + run_id=run_id, ) in_context_messages = current_in_context_messages + new_in_context_messages @@ -766,6 +773,9 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, + llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None, ), ) step_progression = StepProgression.LOGGED_TRACE @@ -882,6 +892,7 @@ class LettaAgent(BaseAgent): llm_config=agent_state.llm_config, total_tokens=usage.total_tokens, force=False, + run_id=run_id, ) await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False) @@ -908,6 +919,7 @@ class LettaAgent(BaseAgent): use_assistant_message: bool = True, request_start_timestamp_ns: int | None = None, include_return_message_types: list[MessageType] | None = None, + run_id: str | None = None, ) -> AsyncGenerator[str, None]: """ Carries out an invocation of the agent loop in a streaming fashion that yields partial tokens. @@ -1027,6 +1039,8 @@ class LettaAgent(BaseAgent): agent_state, llm_client, tool_rules_solver, + run_id=run_id, + step_id=step_id, ) step_progression = StepProgression.STREAM_RECEIVED @@ -1234,6 +1248,9 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, + llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None, ), ) step_progression = StepProgression.LOGGED_TRACE @@ -1378,6 +1395,7 @@ class LettaAgent(BaseAgent): llm_config=agent_state.llm_config, total_tokens=usage.total_tokens, force=False, + run_id=run_id, ) await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False) @@ -1441,6 +1459,7 @@ class LettaAgent(BaseAgent): tool_rules_solver: ToolRulesSolver, agent_step_span: "Span", step_metrics: StepMetrics, + run_id: str | None = None, ) -> tuple[dict, dict, list[Message], list[Message], list[str]] | None: for attempt in range(self.max_summarization_retries + 1): try: @@ -1461,6 +1480,7 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, + step_id=step_metrics.id, call_type="agent_step", ) response = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config) @@ -1488,6 +1508,7 @@ class LettaAgent(BaseAgent): new_letta_messages=new_in_context_messages, llm_config=agent_state.llm_config, force=True, + run_id=run_id, ) new_in_context_messages = [] log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}") @@ -1503,6 +1524,8 @@ class LettaAgent(BaseAgent): agent_state: AgentState, llm_client: LLMClientBase, tool_rules_solver: ToolRulesSolver, + run_id: str | None = None, + step_id: str | None = None, ) -> tuple[dict, AsyncStream[ChatCompletionChunk], list[Message], list[Message], list[str], int] | None: for attempt in range(self.max_summarization_retries + 1): try: @@ -1530,6 +1553,7 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, + step_id=step_id, call_type="agent_step", ) @@ -1555,6 +1579,7 @@ class LettaAgent(BaseAgent): new_letta_messages=new_in_context_messages, llm_config=agent_state.llm_config, force=True, + run_id=run_id, ) new_in_context_messages: list[Message] = [] log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}") @@ -1568,10 +1593,17 @@ class LettaAgent(BaseAgent): new_letta_messages: list[Message], llm_config: LLMConfig, force: bool, + run_id: str | None = None, + step_id: str | None = None, ) -> list[Message]: if isinstance(e, ContextWindowExceededError): return await self._rebuild_context_window( - in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, llm_config=llm_config, force=force + in_context_messages=in_context_messages, + new_letta_messages=new_letta_messages, + llm_config=llm_config, + force=force, + run_id=run_id, + step_id=step_id, ) else: raise llm_client.handle_llm_error(e) @@ -1584,6 +1616,8 @@ class LettaAgent(BaseAgent): llm_config: LLMConfig, total_tokens: int | None = None, force: bool = False, + run_id: str | None = None, + step_id: str | None = None, ) -> list[Message]: # If total tokens is reached, we truncate down # TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc. @@ -1597,6 +1631,8 @@ class LettaAgent(BaseAgent): new_letta_messages=new_letta_messages, force=True, clear=True, + run_id=run_id, + step_id=step_id, ) else: # NOTE (Sarah): Seems like this is doing nothing? @@ -1606,6 +1642,8 @@ class LettaAgent(BaseAgent): new_in_context_messages, updated = await self.summarizer.summarize( in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, + run_id=run_id, + step_id=step_id, ) await self.agent_manager.update_message_ids_async( agent_id=self.agent_id, diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py index 14b53440..58379c78 100644 --- a/letta/agents/letta_agent_v2.py +++ b/letta/agents/letta_agent_v2.py @@ -156,7 +156,11 @@ class LettaAgentV2(BaseAgentV2): run_id=None, messages=in_context_messages + input_messages_to_persist, llm_adapter=LettaLLMRequestAdapter( - llm_client=self.llm_client, llm_config=self.agent_state.llm_config, agent_tags=self.agent_state.tags + llm_client=self.llm_client, + llm_config=self.agent_state.llm_config, + agent_tags=self.agent_state.tags, + org_id=self.actor.organization_id, + user_id=self.actor.id, ), dry_run=True, enforce_run_id_set=False, @@ -213,6 +217,8 @@ class LettaAgentV2(BaseAgentV2): agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, ), run_id=run_id, use_assistant_message=use_assistant_message, @@ -236,6 +242,7 @@ class LettaAgentV2(BaseAgentV2): new_letta_messages=self.response_messages, total_tokens=self.usage.total_tokens, force=False, + run_id=run_id, ) if self.stop_reason is None: @@ -297,6 +304,8 @@ class LettaAgentV2(BaseAgentV2): agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, ) else: llm_adapter = LettaLLMRequestAdapter( @@ -305,6 +314,8 @@ class LettaAgentV2(BaseAgentV2): agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, ) try: @@ -343,6 +354,7 @@ class LettaAgentV2(BaseAgentV2): new_letta_messages=self.response_messages, total_tokens=self.usage.total_tokens, force=False, + run_id=run_id, ) except: @@ -488,6 +500,8 @@ class LettaAgentV2(BaseAgentV2): in_context_messages=messages, new_letta_messages=self.response_messages, force=True, + run_id=run_id, + step_id=step_id, ) else: raise e @@ -1246,6 +1260,8 @@ class LettaAgentV2(BaseAgentV2): new_letta_messages: list[Message], total_tokens: int | None = None, force: bool = False, + run_id: str | None = None, + step_id: str | None = None, ) -> list[Message]: self.logger.warning("Running deprecated v2 summarizer. This should be removed in the future.") # always skip summarization if last message is an approval request message @@ -1268,6 +1284,8 @@ class LettaAgentV2(BaseAgentV2): new_letta_messages=new_letta_messages, force=True, clear=True, + run_id=run_id, + step_id=step_id, ) else: # NOTE (Sarah): Seems like this is doing nothing? @@ -1277,6 +1295,8 @@ class LettaAgentV2(BaseAgentV2): new_in_context_messages, updated = await self.summarizer.summarize( in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, + run_id=run_id, + step_id=step_id, ) except Exception as e: self.logger.error(f"Failed to summarize conversation history: {e}") diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 458d53d0..0a4ea5c2 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -41,6 +41,7 @@ from letta.schemas.step import StepProgression from letta.schemas.step_metrics import StepMetrics from letta.schemas.tool_execution_result import ToolExecutionResult from letta.schemas.usage import LettaUsageStatistics +from letta.schemas.user import User from letta.server.rest_api.utils import ( create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response, @@ -72,6 +73,16 @@ class LettaAgentV3(LettaAgentV2): * Support Gemini / OpenAI client """ + def __init__( + self, + agent_state: AgentState, + actor: User, + conversation_id: str | None = None, + ): + super().__init__(agent_state, actor) + # Set conversation_id after parent init (which calls _initialize_state) + self.conversation_id = conversation_id + def _initialize_state(self): super()._initialize_state() self._require_tool_call = False @@ -168,7 +179,13 @@ class LettaAgentV3(LettaAgentV2): input_messages_to_persist=input_messages_to_persist, # TODO need to support non-streaming adapter too llm_adapter=SimpleLLMRequestAdapter( - llm_client=self.llm_client, llm_config=self.agent_state.llm_config, agent_id=self.agent_state.id, run_id=run_id + llm_client=self.llm_client, + llm_config=self.agent_state.llm_config, + agent_id=self.agent_state.id, + agent_tags=self.agent_state.tags, + run_id=run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, ), run_id=run_id, # use_assistant_message=use_assistant_message, @@ -310,14 +327,20 @@ class LettaAgentV3(LettaAgentV2): llm_client=self.llm_client, llm_config=self.agent_state.llm_config, agent_id=self.agent_state.id, + agent_tags=self.agent_state.tags, run_id=run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, ) else: llm_adapter = SimpleLLMRequestAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, agent_id=self.agent_state.id, + agent_tags=self.agent_state.tags, run_id=run_id, + org_id=self.actor.organization_id, + user_id=self.actor.id, ) try: @@ -390,7 +413,9 @@ class LettaAgentV3(LettaAgentV2): self.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value) except Exception as e: - self.logger.warning(f"Error during agent stream: {e}", exc_info=True) + # Use repr() if str() is empty (happens with Exception() with no args) + error_detail = str(e) or repr(e) + self.logger.warning(f"Error during agent stream: {error_detail}", exc_info=True) # Set stop_reason if not already set if self.stop_reason is None: @@ -411,7 +436,7 @@ class LettaAgentV3(LettaAgentV2): run_id=run_id, error_type="internal_error", message="An error occurred during agent execution.", - detail=str(e), + detail=error_detail, ) yield f"event: error\ndata: {error_message.model_dump_json()}\n\n" @@ -486,10 +511,11 @@ class LettaAgentV3(LettaAgentV2): new_messages: The new messages to persist in_context_messages: The current in-context messages """ - # make sure all the new messages have the correct run_id and step_id + # make sure all the new messages have the correct run_id, step_id, and conversation_id for message in new_messages: message.step_id = step_id message.run_id = run_id + message.conversation_id = self.conversation_id # persist the new message objects - ONLY place where messages are persisted persisted_messages = await self.message_manager.create_many_messages_async( @@ -653,7 +679,15 @@ class LettaAgentV3(LettaAgentV2): return step_id = approval_request.step_id - step_metrics = await self.step_manager.get_step_metrics_async(step_id=step_id, actor=self.actor) + if step_id is None: + # Old approval messages may not have step_id set - generate a new one + self.logger.warning(f"Approval request message {approval_request.id} has no step_id, generating new step_id") + step_id = generate_step_id() + step_progression, logged_step, step_metrics, agent_step_span = await self._step_checkpoint_start( + step_id=step_id, run_id=run_id + ) + else: + step_metrics = await self.step_manager.get_step_metrics_async(step_id=step_id, actor=self.actor) else: # Check for job cancellation at the start of each step if run_id and await self._check_run_cancellation(run_id): @@ -760,7 +794,10 @@ class LettaAgentV3(LettaAgentV2): # TODO: might want to delay this checkpoint in case of corrupated state try: summary_message, messages, _ = await self.compact( - messages, trigger_threshold=self.agent_state.llm_config.context_window + messages, + trigger_threshold=self.agent_state.llm_config.context_window, + run_id=run_id, + step_id=step_id, ) self.logger.info("Summarization succeeded, continuing to retry LLM request") continue @@ -776,7 +813,10 @@ class LettaAgentV3(LettaAgentV2): # update the messages await self._checkpoint_messages( - run_id=run_id, step_id=step_id, new_messages=[summary_message], in_context_messages=messages + run_id=run_id, + step_id=step_id, + new_messages=[summary_message], + in_context_messages=messages, ) else: @@ -879,20 +919,30 @@ class LettaAgentV3(LettaAgentV2): self.logger.info( f"Context window exceeded (current: {self.context_token_estimate}, threshold: {self.agent_state.llm_config.context_window}), trying to compact messages" ) - summary_message, messages, _ = await self.compact(messages, trigger_threshold=self.agent_state.llm_config.context_window) + summary_message, messages, _ = await self.compact( + messages, + trigger_threshold=self.agent_state.llm_config.context_window, + run_id=run_id, + step_id=step_id, + ) # TODO: persist + return the summary message # TODO: convert this to a SummaryMessage self.response_messages.append(summary_message) for message in Message.to_letta_messages(summary_message): yield message await self._checkpoint_messages( - run_id=run_id, step_id=step_id, new_messages=[summary_message], in_context_messages=messages + run_id=run_id, + step_id=step_id, + new_messages=[summary_message], + in_context_messages=messages, ) except Exception as e: # NOTE: message persistence does not happen in the case of an exception (rollback to previous state) - self.logger.warning(f"Error during step processing: {e}") - self.job_update_metadata = {"error": str(e)} + # Use repr() if str() is empty (happens with Exception() with no args) + error_detail = str(e) or repr(e) + self.logger.warning(f"Error during step processing: {error_detail}") + self.job_update_metadata = {"error": error_detail} # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow. if not self.stop_reason: @@ -1445,7 +1495,12 @@ class LettaAgentV3(LettaAgentV2): @trace_method async def compact( - self, messages, trigger_threshold: Optional[int] = None, compaction_settings: Optional["CompactionSettings"] = None + self, + messages, + trigger_threshold: Optional[int] = None, + compaction_settings: Optional["CompactionSettings"] = None, + run_id: Optional[str] = None, + step_id: Optional[str] = None, ) -> tuple[Message, list[Message], str]: """Compact the current in-context messages for this agent. @@ -1472,7 +1527,7 @@ class LettaAgentV3(LettaAgentV2): summarizer_config = CompactionSettings(model=handle) # Build the LLMConfig used for summarization - summarizer_llm_config = self._build_summarizer_llm_config( + summarizer_llm_config = await self._build_summarizer_llm_config( agent_llm_config=self.agent_state.llm_config, summarizer_config=summarizer_config, ) @@ -1484,6 +1539,10 @@ class LettaAgentV3(LettaAgentV2): llm_config=summarizer_llm_config, summarizer_config=summarizer_config, in_context_messages=messages, + agent_id=self.agent_state.id, + agent_tags=self.agent_state.tags, + run_id=run_id, + step_id=step_id, ) elif summarizer_config.mode == "sliding_window": try: @@ -1492,6 +1551,10 @@ class LettaAgentV3(LettaAgentV2): llm_config=summarizer_llm_config, summarizer_config=summarizer_config, in_context_messages=messages, + agent_id=self.agent_state.id, + agent_tags=self.agent_state.tags, + run_id=run_id, + step_id=step_id, ) except Exception as e: self.logger.error(f"Sliding window summarization failed with exception: {str(e)}. Falling back to all mode.") @@ -1500,6 +1563,10 @@ class LettaAgentV3(LettaAgentV2): llm_config=summarizer_llm_config, summarizer_config=summarizer_config, in_context_messages=messages, + agent_id=self.agent_state.id, + agent_tags=self.agent_state.tags, + run_id=run_id, + step_id=step_id, ) summarization_mode_used = "all" else: @@ -1533,6 +1600,10 @@ class LettaAgentV3(LettaAgentV2): llm_config=self.agent_state.llm_config, summarizer_config=summarizer_config, in_context_messages=compacted_messages, + agent_id=self.agent_state.id, + agent_tags=self.agent_state.tags, + run_id=run_id, + step_id=step_id, ) summarization_mode_used = "all" @@ -1584,8 +1655,8 @@ class LettaAgentV3(LettaAgentV2): return summary_message_obj, final_messages, summary - @staticmethod - def _build_summarizer_llm_config( + async def _build_summarizer_llm_config( + self, agent_llm_config: LLMConfig, summarizer_config: CompactionSettings, ) -> LLMConfig: @@ -1611,12 +1682,41 @@ class LettaAgentV3(LettaAgentV2): model_name = summarizer_config.model # Start from the agent's config and override model + provider_name + handle - # Note: model_endpoint_type is NOT overridden - the parsed provider_name - # is a custom label (e.g. "claude-pro-max"), not the endpoint type (e.g. "anthropic") - base = agent_llm_config.model_copy() - base.provider_name = provider_name - base.model = model_name - base.handle = summarizer_config.model + # Check if the summarizer's provider matches the agent's provider + # If they match, we can safely use the agent's config as a base + # If they don't match, we need to load the default config for the new provider + from letta.schemas.enums import ProviderType + + provider_matches = False + try: + # Check if provider_name is a valid ProviderType that matches agent's endpoint type + provider_type = ProviderType(provider_name) + provider_matches = provider_type.value == agent_llm_config.model_endpoint_type + except ValueError: + # provider_name is a custom label - check if it matches agent's provider_name + provider_matches = provider_name == agent_llm_config.provider_name + + if provider_matches: + # Same provider - use agent's config as base and override model/handle + base = agent_llm_config.model_copy() + base.model = model_name + base.handle = summarizer_config.model + else: + # Different provider - load default config for this handle + from letta.services.provider_manager import ProviderManager + + provider_manager = ProviderManager() + try: + base = await provider_manager.get_llm_config_from_handle( + handle=summarizer_config.model, + actor=self.actor, + ) + except Exception as e: + self.logger.warning( + f"Failed to load LLM config for summarizer handle '{summarizer_config.model}': {e}. " + f"Falling back to agent's LLM config." + ) + return agent_llm_config # If explicit model_settings are provided for the summarizer, apply # them just like server.create_agent_async does for agents. diff --git a/letta/constants.py b/letta/constants.py index 9d9c94fb..a6b53c6d 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -25,7 +25,7 @@ PROVIDER_ORDER = { "xai": 12, "lmstudio": 13, "zai": 14, - "openrouter": 15, # Note: OpenRouter uses OpenRouterProvider, not a ProviderType enum + "openrouter": 15, } ADMIN_PREFIX = "/v1/admin" diff --git a/letta/functions/ast_parsers.py b/letta/functions/ast_parsers.py index 627b7fdb..14eed2fa 100644 --- a/letta/functions/ast_parsers.py +++ b/letta/functions/ast_parsers.py @@ -7,8 +7,52 @@ from typing import Dict, Optional, Tuple from letta.errors import LettaToolCreateError from letta.types import JsonDict +_ALLOWED_TYPING_NAMES = {name: obj for name, obj in vars(typing).items() if not name.startswith("_")} +_ALLOWED_BUILTIN_TYPES = {name: obj for name, obj in vars(builtins).items() if isinstance(obj, type)} +_ALLOWED_TYPE_NAMES = {**_ALLOWED_TYPING_NAMES, **_ALLOWED_BUILTIN_TYPES, "typing": typing} -def resolve_type(annotation: str): + +def _resolve_annotation_node(node: ast.AST): + if isinstance(node, ast.Name): + if node.id == "None": + return type(None) + if node.id in _ALLOWED_TYPE_NAMES: + return _ALLOWED_TYPE_NAMES[node.id] + raise ValueError(f"Unsupported annotation name: {node.id}") + + if isinstance(node, ast.Attribute): + if isinstance(node.value, ast.Name) and node.value.id == "typing" and node.attr in _ALLOWED_TYPING_NAMES: + return _ALLOWED_TYPING_NAMES[node.attr] + raise ValueError("Unsupported annotation attribute") + + if isinstance(node, ast.Subscript): + origin = _resolve_annotation_node(node.value) + args = _resolve_subscript_slice(node.slice) + return origin[args] + + if isinstance(node, ast.Tuple): + return tuple(_resolve_annotation_node(elt) for elt in node.elts) + + if isinstance(node, ast.BinOp) and isinstance(node.op, ast.BitOr): + left = _resolve_annotation_node(node.left) + right = _resolve_annotation_node(node.right) + return left | right + + if isinstance(node, ast.Constant) and node.value is None: + return type(None) + + raise ValueError("Unsupported annotation expression") + + +def _resolve_subscript_slice(slice_node: ast.AST): + if isinstance(slice_node, ast.Index): + slice_node = slice_node.value + if isinstance(slice_node, ast.Tuple): + return tuple(_resolve_annotation_node(elt) for elt in slice_node.elts) + return _resolve_annotation_node(slice_node) + + +def resolve_type(annotation: str, *, allow_unsafe_eval: bool = False, extra_globals: Optional[Dict[str, object]] = None): """ Resolve a type annotation string into a Python type. Previously, primitive support for int, float, str, dict, list, set, tuple, bool. @@ -23,15 +67,23 @@ def resolve_type(annotation: str): ValueError: If the annotation is unsupported or invalid. """ python_types = {**vars(typing), **vars(builtins)} + if extra_globals: + python_types.update(extra_globals) if annotation in python_types: return python_types[annotation] try: - # Allow use of typing and builtins in a safe eval context - return eval(annotation, python_types) + parsed = ast.parse(annotation, mode="eval") + return _resolve_annotation_node(parsed.body) except Exception: - raise ValueError(f"Unsupported annotation: {annotation}") + if allow_unsafe_eval: + try: + return eval(annotation, python_types) + except Exception as exc: + raise ValueError(f"Unsupported annotation: {annotation}") from exc + + raise ValueError(f"Unsupported annotation: {annotation}") # TODO :: THIS MUST BE EDITED TO HANDLE THINGS @@ -62,14 +114,34 @@ def get_function_annotations_from_source(source_code: str, function_name: str) - # NOW json_loads -> ast.literal_eval -> typing.get_origin -def coerce_dict_args_by_annotations(function_args: JsonDict, annotations: Dict[str, str]) -> dict: +def coerce_dict_args_by_annotations( + function_args: JsonDict, + annotations: Dict[str, object], + *, + allow_unsafe_eval: bool = False, + extra_globals: Optional[Dict[str, object]] = None, +) -> dict: coerced_args = dict(function_args) # Shallow copy for arg_name, value in coerced_args.items(): if arg_name in annotations: annotation_str = annotations[arg_name] try: - arg_type = resolve_type(annotation_str) + annotation_value = annotations[arg_name] + if isinstance(annotation_value, str): + arg_type = resolve_type( + annotation_value, + allow_unsafe_eval=allow_unsafe_eval, + extra_globals=extra_globals, + ) + elif isinstance(annotation_value, typing.ForwardRef): + arg_type = resolve_type( + annotation_value.__forward_arg__, + allow_unsafe_eval=allow_unsafe_eval, + extra_globals=extra_globals, + ) + else: + arg_type = annotation_value # Always parse strings using literal_eval or json if possible if isinstance(value, str): diff --git a/letta/functions/function_sets/base.py b/letta/functions/function_sets/base.py index 8b907889..45cede97 100644 --- a/letta/functions/function_sets/base.py +++ b/letta/functions/function_sets/base.py @@ -383,12 +383,12 @@ def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str: # snippet = "\n".join(new_value.split("\n")[start_line : end_line + 1]) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - # success_msg += self._make_output( - # snippet, f"a snippet of {path}", start_line + 1 - # ) - # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n" - success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." + ) # return None return success_msg @@ -454,14 +454,12 @@ def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_li agent_state.memory.update_block_value(label=label, value=new_value) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - # success_msg += self._make_output( - # snippet, - # "a snippet of the edited file", - # max(1, insert_line - SNIPPET_LINES + 1), - # ) - # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n" - success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." + ) return success_msg @@ -532,12 +530,12 @@ def memory_rethink(agent_state: "AgentState", label: str, new_memory: str) -> No agent_state.memory.update_block_value(label=label, value=new_memory) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - # success_msg += self._make_output( - # snippet, f"a snippet of {path}", start_line + 1 - # ) - # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n" - success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." + ) # return None return success_msg diff --git a/letta/helpers/message_helper.py b/letta/helpers/message_helper.py index 6250bdb3..f4e142df 100644 --- a/letta/helpers/message_helper.py +++ b/letta/helpers/message_helper.py @@ -166,3 +166,61 @@ async def _convert_message_create_to_message( batch_item_id=message_create.batch_item_id, run_id=run_id, ) + + +async def _resolve_url_to_base64(url: str) -> tuple[str, str]: + """Resolve URL to base64 data and media type.""" + if url.startswith("file://"): + parsed = urlparse(url) + file_path = unquote(parsed.path) + image_bytes = await asyncio.to_thread(lambda: open(file_path, "rb").read()) + media_type, _ = mimetypes.guess_type(file_path) + media_type = media_type or "image/jpeg" + else: + image_bytes, media_type = await _fetch_image_from_url(url) + media_type = media_type or mimetypes.guess_type(url)[0] or "image/png" + + image_data = base64.standard_b64encode(image_bytes).decode("utf-8") + return image_data, media_type + + +async def resolve_tool_return_images(func_response: str | list) -> str | list: + """Resolve URL and LettaImage sources to base64 for tool returns.""" + if isinstance(func_response, str): + return func_response + + resolved = [] + for part in func_response: + if isinstance(part, ImageContent): + if part.source.type == ImageSourceType.url: + image_data, media_type = await _resolve_url_to_base64(part.source.url) + part.source = Base64Image(media_type=media_type, data=image_data) + elif part.source.type == ImageSourceType.letta and not part.source.data: + pass + resolved.append(part) + elif isinstance(part, TextContent): + resolved.append(part) + elif isinstance(part, dict): + if part.get("type") == "image" and part.get("source", {}).get("type") == "url": + url = part["source"].get("url") + if url: + image_data, media_type = await _resolve_url_to_base64(url) + resolved.append( + ImageContent( + source=Base64Image( + media_type=media_type, + data=image_data, + detail=part.get("source", {}).get("detail"), + ) + ) + ) + else: + resolved.append(part) + elif part.get("type") == "text": + resolved.append(TextContent(text=part.get("text", ""))) + else: + resolved.append(part) + else: + resolved.append(part) + + return resolved diff --git a/letta/helpers/tpuf_client.py b/letta/helpers/tpuf_client.py index a550f18d..169b9969 100644 --- a/letta/helpers/tpuf_client.py +++ b/letta/helpers/tpuf_client.py @@ -7,6 +7,7 @@ from datetime import datetime, timezone from typing import Any, Callable, List, Optional, Tuple from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE +from letta.errors import LettaInvalidArgumentError from letta.otel.tracing import trace_method from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import MessageRole, TagMatchMode @@ -321,6 +322,7 @@ class TurbopufferClient: actor: "PydanticUser", tags: Optional[List[str]] = None, created_at: Optional[datetime] = None, + embeddings: Optional[List[List[float]]] = None, ) -> List[PydanticPassage]: """Insert passages into Turbopuffer. @@ -332,6 +334,7 @@ class TurbopufferClient: actor: User actor for embedding generation tags: Optional list of tags to attach to all passages created_at: Optional timestamp for retroactive entries (defaults to current UTC time) + embeddings: Optional pre-computed embeddings (must match 1:1 with text_chunks). If provided, skips embedding generation. Returns: List of PydanticPassage objects that were inserted @@ -345,9 +348,30 @@ class TurbopufferClient: logger.warning("All text chunks were empty, skipping insertion") return [] - # generate embeddings using the default config filtered_texts = [text for _, text in filtered_chunks] - embeddings = await self._generate_embeddings(filtered_texts, actor) + + # use provided embeddings only if dimensions match TPUF's expected dimension + use_provided_embeddings = False + if embeddings is not None: + if len(embeddings) != len(text_chunks): + raise LettaInvalidArgumentError( + f"embeddings length ({len(embeddings)}) must match text_chunks length ({len(text_chunks)})", + argument_name="embeddings", + ) + # check if first non-empty embedding has correct dimensions + filtered_indices = [i for i, _ in filtered_chunks] + sample_embedding = embeddings[filtered_indices[0]] if filtered_indices else None + if sample_embedding is not None and len(sample_embedding) == self.default_embedding_config.embedding_dim: + use_provided_embeddings = True + filtered_embeddings = [embeddings[i] for i, _ in filtered_chunks] + else: + logger.debug( + f"Embedding dimension mismatch (got {len(sample_embedding) if sample_embedding else 'None'}, " + f"expected {self.default_embedding_config.embedding_dim}), regenerating embeddings" + ) + + if not use_provided_embeddings: + filtered_embeddings = await self._generate_embeddings(filtered_texts, actor) namespace_name = await self._get_archive_namespace_name(archive_id) @@ -379,7 +403,7 @@ class TurbopufferClient: tags_arrays = [] # Store tags as arrays passages = [] - for (original_idx, text), embedding in zip(filtered_chunks, embeddings): + for (original_idx, text), embedding in zip(filtered_chunks, filtered_embeddings): passage_id = passage_ids[original_idx] # append to columns diff --git a/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py b/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py index ffe6ac63..0c13a727 100644 --- a/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +++ b/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py @@ -39,6 +39,7 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser +from letta.server.rest_api.streaming_response import RunCancelledException from letta.server.rest_api.utils import decrement_message_uuid logger = get_logger(__name__) @@ -145,6 +146,26 @@ class SimpleAnthropicStreamingInterface: return tool_calls[0] return None + def get_usage_statistics(self) -> "LettaUsageStatistics": + """Extract usage statistics from accumulated streaming data. + + Returns: + LettaUsageStatistics with token counts from the stream. + """ + from letta.schemas.usage import LettaUsageStatistics + + # Anthropic: input_tokens is NON-cached only, must add cache tokens for total + actual_input_tokens = (self.input_tokens or 0) + (self.cache_read_tokens or 0) + (self.cache_creation_tokens or 0) + + return LettaUsageStatistics( + prompt_tokens=actual_input_tokens, + completion_tokens=self.output_tokens or 0, + total_tokens=actual_input_tokens + (self.output_tokens or 0), + cached_input_tokens=self.cache_read_tokens if self.cache_read_tokens else None, + cache_write_tokens=self.cache_creation_tokens if self.cache_creation_tokens else None, + reasoning_tokens=None, # Anthropic doesn't report reasoning tokens separately + ) + def get_reasoning_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]: def _process_group( group: list[ReasoningMessage | HiddenReasoningMessage | AssistantMessage], @@ -228,10 +249,10 @@ class SimpleAnthropicStreamingInterface: prev_message_type = new_message_type # print(f"Yielding message: {message}") yield message - except asyncio.CancelledError as e: + except (asyncio.CancelledError, RunCancelledException) as e: import traceback - logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc()) + logger.info("Cancelled stream attempt but overriding (%s) %s: %s", type(e).__name__, e, traceback.format_exc()) async for message in self._process_event(event, ttft_span, prev_message_type, message_index): new_message_type = message.message_type if new_message_type != prev_message_type: diff --git a/letta/interfaces/anthropic_streaming_interface.py b/letta/interfaces/anthropic_streaming_interface.py index e27d38bc..fa1fdefa 100644 --- a/letta/interfaces/anthropic_streaming_interface.py +++ b/letta/interfaces/anthropic_streaming_interface.py @@ -41,6 +41,7 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser +from letta.server.rest_api.streaming_response import RunCancelledException logger = get_logger(__name__) @@ -127,6 +128,25 @@ class AnthropicStreamingInterface: arguments = str(json.dumps(tool_input, indent=2)) return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name)) + def get_usage_statistics(self) -> "LettaUsageStatistics": + """Extract usage statistics from accumulated streaming data. + + Returns: + LettaUsageStatistics with token counts from the stream. + """ + from letta.schemas.usage import LettaUsageStatistics + + # Anthropic: input_tokens is NON-cached only in streaming + # This interface doesn't track cache tokens, so we just use the raw values + return LettaUsageStatistics( + prompt_tokens=self.input_tokens or 0, + completion_tokens=self.output_tokens or 0, + total_tokens=(self.input_tokens or 0) + (self.output_tokens or 0), + cached_input_tokens=None, # This interface doesn't track cache tokens + cache_write_tokens=None, + reasoning_tokens=None, + ) + def _check_inner_thoughts_complete(self, combined_args: str) -> bool: """ Check if inner thoughts are complete in the current tool call arguments @@ -218,10 +238,10 @@ class AnthropicStreamingInterface: message_index += 1 prev_message_type = new_message_type yield message - except asyncio.CancelledError as e: + except (asyncio.CancelledError, RunCancelledException) as e: import traceback - logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc()) + logger.info("Cancelled stream attempt but overriding (%s) %s: %s", type(e).__name__, e, traceback.format_exc()) async for message in self._process_event(event, ttft_span, prev_message_type, message_index): new_message_type = message.message_type if new_message_type != prev_message_type: @@ -636,6 +656,25 @@ class SimpleAnthropicStreamingInterface: arguments = str(json.dumps(tool_input, indent=2)) return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name)) + def get_usage_statistics(self) -> "LettaUsageStatistics": + """Extract usage statistics from accumulated streaming data. + + Returns: + LettaUsageStatistics with token counts from the stream. + """ + from letta.schemas.usage import LettaUsageStatistics + + # Anthropic: input_tokens is NON-cached only in streaming + # This interface doesn't track cache tokens, so we just use the raw values + return LettaUsageStatistics( + prompt_tokens=self.input_tokens or 0, + completion_tokens=self.output_tokens or 0, + total_tokens=(self.input_tokens or 0) + (self.output_tokens or 0), + cached_input_tokens=None, # This interface doesn't track cache tokens + cache_write_tokens=None, + reasoning_tokens=None, + ) + def get_reasoning_content(self) -> list[TextContent | ReasoningContent | RedactedReasoningContent]: def _process_group( group: list[ReasoningMessage | HiddenReasoningMessage | AssistantMessage], @@ -726,10 +765,10 @@ class SimpleAnthropicStreamingInterface: prev_message_type = new_message_type # print(f"Yielding message: {message}") yield message - except asyncio.CancelledError as e: + except (asyncio.CancelledError, RunCancelledException) as e: import traceback - logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc()) + logger.info("Cancelled stream attempt but overriding (%s) %s: %s", type(e).__name__, e, traceback.format_exc()) async for message in self._process_event(event, ttft_span, prev_message_type, message_index): new_message_type = message.message_type if new_message_type != prev_message_type: diff --git a/letta/interfaces/gemini_streaming_interface.py b/letta/interfaces/gemini_streaming_interface.py index 91fbb502..9656977c 100644 --- a/letta/interfaces/gemini_streaming_interface.py +++ b/letta/interfaces/gemini_streaming_interface.py @@ -26,6 +26,7 @@ from letta.schemas.letta_message_content import ( from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall +from letta.server.rest_api.streaming_response import RunCancelledException from letta.server.rest_api.utils import decrement_message_uuid from letta.utils import get_tool_call_id @@ -43,9 +44,11 @@ class SimpleGeminiStreamingInterface: requires_approval_tools: list = [], run_id: str | None = None, step_id: str | None = None, + cancellation_event: Optional["asyncio.Event"] = None, ): self.run_id = run_id self.step_id = step_id + self.cancellation_event = cancellation_event # self.messages = messages # self.tools = tools @@ -89,6 +92,9 @@ class SimpleGeminiStreamingInterface: # Raw usage from provider (for transparent logging in provider trace) self.raw_usage: dict | None = None + # Track cancellation status + self.stream_was_cancelled: bool = False + def get_content(self) -> List[ReasoningContent | TextContent | ToolCallContent]: """This is (unusually) in chunked format, instead of merged""" for content in self.content_parts: @@ -116,6 +122,27 @@ class SimpleGeminiStreamingInterface: """Return all finalized tool calls collected during this message (parallel supported).""" return list(self.collected_tool_calls) + def get_usage_statistics(self) -> "LettaUsageStatistics": + """Extract usage statistics from accumulated streaming data. + + Returns: + LettaUsageStatistics with token counts from the stream. + + Note: + Gemini uses `thinking_tokens` instead of `reasoning_tokens` (OpenAI o1/o3). + """ + from letta.schemas.usage import LettaUsageStatistics + + return LettaUsageStatistics( + prompt_tokens=self.input_tokens or 0, + completion_tokens=self.output_tokens or 0, + total_tokens=(self.input_tokens or 0) + (self.output_tokens or 0), + # Gemini: input_tokens is already total, cached_tokens is a subset (not additive) + cached_input_tokens=self.cached_tokens, + cache_write_tokens=None, # Gemini doesn't report cache write tokens + reasoning_tokens=self.thinking_tokens, # Gemini uses thinking_tokens + ) + async def process( self, stream: AsyncIterator[GenerateContentResponse], @@ -137,10 +164,10 @@ class SimpleGeminiStreamingInterface: message_index += 1 prev_message_type = new_message_type yield message - except asyncio.CancelledError as e: + except (asyncio.CancelledError, RunCancelledException) as e: import traceback - logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc()) + logger.info("Cancelled stream attempt but overriding (%s) %s: %s", type(e).__name__, e, traceback.format_exc()) async for message in self._process_event(event, ttft_span, prev_message_type, message_index): new_message_type = message.message_type if new_message_type != prev_message_type: @@ -164,7 +191,11 @@ class SimpleGeminiStreamingInterface: yield LettaStopReason(stop_reason=StopReasonType.error) raise e finally: - logger.info("GeminiStreamingInterface: Stream processing complete.") + # Check if cancellation was signaled via shared event + if self.cancellation_event and self.cancellation_event.is_set(): + self.stream_was_cancelled = True + + logger.info(f"GeminiStreamingInterface: Stream processing complete. stream was cancelled: {self.stream_was_cancelled}") async def _process_event( self, diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py index 36e3dfa6..ca3602df 100644 --- a/letta/interfaces/openai_streaming_interface.py +++ b/letta/interfaces/openai_streaming_interface.py @@ -54,6 +54,7 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall from letta.server.rest_api.json_parser import OptimisticJSONParser +from letta.server.rest_api.streaming_response import RunCancelledException from letta.server.rest_api.utils import decrement_message_uuid from letta.services.context_window_calculator.token_counter import create_token_counter from letta.streaming_utils import ( @@ -82,6 +83,7 @@ class OpenAIStreamingInterface: requires_approval_tools: list = [], run_id: str | None = None, step_id: str | None = None, + cancellation_event: Optional["asyncio.Event"] = None, ): self.use_assistant_message = use_assistant_message @@ -93,6 +95,7 @@ class OpenAIStreamingInterface: self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg self.run_id = run_id self.step_id = step_id + self.cancellation_event = cancellation_event self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser() self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=put_inner_thoughts_in_kwarg) @@ -191,6 +194,28 @@ class OpenAIStreamingInterface: function=FunctionCall(arguments=self._get_current_function_arguments(), name=function_name), ) + def get_usage_statistics(self) -> "LettaUsageStatistics": + """Extract usage statistics from accumulated streaming data. + + Returns: + LettaUsageStatistics with token counts from the stream. + """ + from letta.schemas.usage import LettaUsageStatistics + + # Use actual tokens if available, otherwise fall back to estimated + input_tokens = self.input_tokens if self.input_tokens else self.fallback_input_tokens + output_tokens = self.output_tokens if self.output_tokens else self.fallback_output_tokens + + return LettaUsageStatistics( + prompt_tokens=input_tokens or 0, + completion_tokens=output_tokens or 0, + total_tokens=(input_tokens or 0) + (output_tokens or 0), + # OpenAI: input_tokens is already total, cached_tokens is a subset (not additive) + cached_input_tokens=None, # This interface doesn't track cache tokens + cache_write_tokens=None, + reasoning_tokens=None, # This interface doesn't track reasoning tokens + ) + async def process( self, stream: AsyncStream[ChatCompletionChunk], @@ -226,14 +251,15 @@ class OpenAIStreamingInterface: message_index += 1 prev_message_type = new_message_type yield message - except asyncio.CancelledError as e: + except (asyncio.CancelledError, RunCancelledException) as e: import traceback self.stream_was_cancelled = True logger.warning( - "Stream was cancelled (CancelledError). Attempting to process current event. " + "Stream was cancelled (%s). Attempting to process current event. " f"Events received so far: {self.total_events_received}, last event: {self.last_event_type}. " f"Error: %s, trace: %s", + type(e).__name__, e, traceback.format_exc(), ) @@ -267,6 +293,10 @@ class OpenAIStreamingInterface: yield LettaStopReason(stop_reason=StopReasonType.error) raise e finally: + # Check if cancellation was signaled via shared event + if self.cancellation_event and self.cancellation_event.is_set(): + self.stream_was_cancelled = True + logger.info( f"OpenAIStreamingInterface: Stream processing complete. " f"Received {self.total_events_received} events, " @@ -561,9 +591,11 @@ class SimpleOpenAIStreamingInterface: model: str = None, run_id: str | None = None, step_id: str | None = None, + cancellation_event: Optional["asyncio.Event"] = None, ): self.run_id = run_id self.step_id = step_id + self.cancellation_event = cancellation_event # Premake IDs for database writes self.letta_message_id = Message.generate_id() @@ -662,6 +694,28 @@ class SimpleOpenAIStreamingInterface: raise ValueError("No tool calls available") return calls[0] + def get_usage_statistics(self) -> "LettaUsageStatistics": + """Extract usage statistics from accumulated streaming data. + + Returns: + LettaUsageStatistics with token counts from the stream. + """ + from letta.schemas.usage import LettaUsageStatistics + + # Use actual tokens if available, otherwise fall back to estimated + input_tokens = self.input_tokens if self.input_tokens else self.fallback_input_tokens + output_tokens = self.output_tokens if self.output_tokens else self.fallback_output_tokens + + return LettaUsageStatistics( + prompt_tokens=input_tokens or 0, + completion_tokens=output_tokens or 0, + total_tokens=(input_tokens or 0) + (output_tokens or 0), + # OpenAI: input_tokens is already total, cached_tokens is a subset (not additive) + cached_input_tokens=self.cached_tokens, + cache_write_tokens=None, # OpenAI doesn't have cache write tokens + reasoning_tokens=self.reasoning_tokens, + ) + async def process( self, stream: AsyncStream[ChatCompletionChunk], @@ -715,14 +769,15 @@ class SimpleOpenAIStreamingInterface: message_index += 1 prev_message_type = new_message_type yield message - except asyncio.CancelledError as e: + except (asyncio.CancelledError, RunCancelledException) as e: import traceback self.stream_was_cancelled = True logger.warning( - "Stream was cancelled (CancelledError). Attempting to process current event. " + "Stream was cancelled (%s). Attempting to process current event. " f"Events received so far: {self.total_events_received}, last event: {self.last_event_type}. " f"Error: %s, trace: %s", + type(e).__name__, e, traceback.format_exc(), ) @@ -764,6 +819,10 @@ class SimpleOpenAIStreamingInterface: yield LettaStopReason(stop_reason=StopReasonType.error) raise e finally: + # Check if cancellation was signaled via shared event + if self.cancellation_event and self.cancellation_event.is_set(): + self.stream_was_cancelled = True + logger.info( f"SimpleOpenAIStreamingInterface: Stream processing complete. " f"Received {self.total_events_received} events, " @@ -932,6 +991,7 @@ class SimpleOpenAIResponsesStreamingInterface: model: str = None, run_id: str | None = None, step_id: str | None = None, + cancellation_event: Optional["asyncio.Event"] = None, ): self.is_openai_proxy = is_openai_proxy self.messages = messages @@ -946,6 +1006,7 @@ class SimpleOpenAIResponsesStreamingInterface: self.message_id = None self.run_id = run_id self.step_id = step_id + self.cancellation_event = cancellation_event # Premake IDs for database writes self.letta_message_id = Message.generate_id() @@ -1063,6 +1124,24 @@ class SimpleOpenAIResponsesStreamingInterface: raise ValueError("No tool calls available") return calls[0] + def get_usage_statistics(self) -> "LettaUsageStatistics": + """Extract usage statistics from accumulated streaming data. + + Returns: + LettaUsageStatistics with token counts from the stream. + """ + from letta.schemas.usage import LettaUsageStatistics + + return LettaUsageStatistics( + prompt_tokens=self.input_tokens or 0, + completion_tokens=self.output_tokens or 0, + total_tokens=(self.input_tokens or 0) + (self.output_tokens or 0), + # OpenAI Responses API: input_tokens is already total + cached_input_tokens=self.cached_tokens, + cache_write_tokens=None, # OpenAI doesn't have cache write tokens + reasoning_tokens=self.reasoning_tokens, + ) + async def process( self, stream: AsyncStream[ResponseStreamEvent], @@ -1102,14 +1181,15 @@ class SimpleOpenAIResponsesStreamingInterface: ) # Continue to next event rather than killing the stream continue - except asyncio.CancelledError as e: + except (asyncio.CancelledError, RunCancelledException) as e: import traceback self.stream_was_cancelled = True logger.warning( - "Stream was cancelled (CancelledError). Attempting to process current event. " + "Stream was cancelled (%s). Attempting to process current event. " f"Events received so far: {self.total_events_received}, last event: {self.last_event_type}. " f"Error: %s, trace: %s", + type(e).__name__, e, traceback.format_exc(), ) @@ -1136,6 +1216,10 @@ class SimpleOpenAIResponsesStreamingInterface: yield LettaStopReason(stop_reason=StopReasonType.error) raise e finally: + # Check if cancellation was signaled via shared event + if self.cancellation_event and self.cancellation_event.is_set(): + self.stream_was_cancelled = True + logger.info( f"ResponsesAPI Stream processing complete. " f"Received {self.total_events_received} events, " diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 8461167a..8acd65dd 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -48,6 +48,7 @@ from letta.schemas.openai.chat_completion_response import ( UsageStatistics, ) from letta.schemas.response_format import JsonSchemaResponseFormat +from letta.schemas.usage import LettaUsageStatistics from letta.settings import model_settings DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence." @@ -777,6 +778,18 @@ class AnthropicClient(LLMClientBase): if not block.get("text", "").strip(): block["text"] = "." + # Strip trailing whitespace from final assistant message + # Anthropic API rejects messages where "final assistant content cannot end with trailing whitespace" + if is_final_assistant: + if isinstance(content, str): + msg["content"] = content.rstrip() + elif isinstance(content, list) and len(content) > 0: + # Find and strip trailing whitespace from the last text block + for block in reversed(content): + if isinstance(block, dict) and block.get("type") == "text": + block["text"] = block.get("text", "").rstrip() + break + try: count_params = { "model": model or "claude-3-7-sonnet-20250219", @@ -976,6 +989,35 @@ class AnthropicClient(LLMClientBase): return super().handle_llm_error(e) + def extract_usage_statistics(self, response_data: dict | None, llm_config: LLMConfig) -> LettaUsageStatistics: + """Extract usage statistics from Anthropic response and return as LettaUsageStatistics.""" + if not response_data: + return LettaUsageStatistics() + + response = AnthropicMessage(**response_data) + prompt_tokens = response.usage.input_tokens + completion_tokens = response.usage.output_tokens + + # Extract cache data if available (None means not reported, 0 means reported as 0) + cache_read_tokens = None + cache_creation_tokens = None + if hasattr(response.usage, "cache_read_input_tokens"): + cache_read_tokens = response.usage.cache_read_input_tokens + if hasattr(response.usage, "cache_creation_input_tokens"): + cache_creation_tokens = response.usage.cache_creation_input_tokens + + # Per Anthropic docs: "Total input tokens in a request is the summation of + # input_tokens, cache_creation_input_tokens, and cache_read_input_tokens." + actual_input_tokens = prompt_tokens + (cache_read_tokens or 0) + (cache_creation_tokens or 0) + + return LettaUsageStatistics( + prompt_tokens=actual_input_tokens, + completion_tokens=completion_tokens, + total_tokens=actual_input_tokens + completion_tokens, + cached_input_tokens=cache_read_tokens, + cache_write_tokens=cache_creation_tokens, + ) + # TODO: Input messages doesn't get used here # TODO: Clean up this interface @trace_method @@ -1020,10 +1062,13 @@ class AnthropicClient(LLMClientBase): } """ response = AnthropicMessage(**response_data) - prompt_tokens = response.usage.input_tokens - completion_tokens = response.usage.output_tokens finish_reason = remap_finish_reason(str(response.stop_reason)) + # Extract usage via centralized method + from letta.schemas.enums import ProviderType + + usage_stats = self.extract_usage_statistics(response_data, llm_config).to_usage(ProviderType.anthropic) + content = None reasoning_content = None reasoning_content_signature = None @@ -1088,35 +1133,12 @@ class AnthropicClient(LLMClientBase): ), ) - # Build prompt tokens details with cache data if available - prompt_tokens_details = None - cache_read_tokens = 0 - cache_creation_tokens = 0 - if hasattr(response.usage, "cache_read_input_tokens") or hasattr(response.usage, "cache_creation_input_tokens"): - from letta.schemas.openai.chat_completion_response import UsageStatisticsPromptTokenDetails - - cache_read_tokens = getattr(response.usage, "cache_read_input_tokens", 0) or 0 - cache_creation_tokens = getattr(response.usage, "cache_creation_input_tokens", 0) or 0 - prompt_tokens_details = UsageStatisticsPromptTokenDetails( - cache_read_tokens=cache_read_tokens, - cache_creation_tokens=cache_creation_tokens, - ) - - # Per Anthropic docs: "Total input tokens in a request is the summation of - # input_tokens, cache_creation_input_tokens, and cache_read_input_tokens." - actual_input_tokens = prompt_tokens + cache_read_tokens + cache_creation_tokens - chat_completion_response = ChatCompletionResponse( id=response.id, choices=[choice], created=get_utc_time_int(), model=response.model, - usage=UsageStatistics( - prompt_tokens=actual_input_tokens, - completion_tokens=completion_tokens, - total_tokens=actual_input_tokens + completion_tokens, - prompt_tokens_details=prompt_tokens_details, - ), + usage=usage_stats, ) if llm_config.put_inner_thoughts_in_kwargs: chat_completion_response = unpack_all_inner_thoughts_from_kwargs( diff --git a/letta/llm_api/chatgpt_oauth_client.py b/letta/llm_api/chatgpt_oauth_client.py index 7bf991d9..86854c06 100644 --- a/letta/llm_api/chatgpt_oauth_client.py +++ b/letta/llm_api/chatgpt_oauth_client.py @@ -54,6 +54,7 @@ from letta.schemas.openai.chat_completion_response import ( UsageStatistics, ) from letta.schemas.providers.chatgpt_oauth import ChatGPTOAuthCredentials, ChatGPTOAuthProvider +from letta.schemas.usage import LettaUsageStatistics logger = get_logger(__name__) @@ -511,6 +512,25 @@ class ChatGPTOAuthClient(LLMClientBase): # Response should already be in ChatCompletion format after transformation return ChatCompletionResponse(**response_data) + def extract_usage_statistics(self, response_data: dict | None, llm_config: LLMConfig) -> LettaUsageStatistics: + """Extract usage statistics from ChatGPT OAuth response and return as LettaUsageStatistics.""" + if not response_data: + return LettaUsageStatistics() + + usage = response_data.get("usage") + if not usage: + return LettaUsageStatistics() + + prompt_tokens = usage.get("prompt_tokens") or 0 + completion_tokens = usage.get("completion_tokens") or 0 + total_tokens = usage.get("total_tokens") or (prompt_tokens + completion_tokens) + + return LettaUsageStatistics( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + ) + @trace_method async def stream_async( self, diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 0bd13ed0..b5bac794 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -39,6 +39,7 @@ from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as PydanticMessage from letta.schemas.openai.chat_completion_request import Tool, Tool as OpenAITool from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics +from letta.schemas.usage import LettaUsageStatistics from letta.settings import model_settings, settings from letta.utils import get_tool_call_id @@ -415,6 +416,34 @@ class GoogleVertexClient(LLMClientBase): return request_data + def extract_usage_statistics(self, response_data: dict | None, llm_config: LLMConfig) -> LettaUsageStatistics: + """Extract usage statistics from Gemini response and return as LettaUsageStatistics.""" + if not response_data: + return LettaUsageStatistics() + + response = GenerateContentResponse(**response_data) + if not response.usage_metadata: + return LettaUsageStatistics() + + cached_tokens = None + if ( + hasattr(response.usage_metadata, "cached_content_token_count") + and response.usage_metadata.cached_content_token_count is not None + ): + cached_tokens = response.usage_metadata.cached_content_token_count + + reasoning_tokens = None + if hasattr(response.usage_metadata, "thoughts_token_count") and response.usage_metadata.thoughts_token_count is not None: + reasoning_tokens = response.usage_metadata.thoughts_token_count + + return LettaUsageStatistics( + prompt_tokens=response.usage_metadata.prompt_token_count or 0, + completion_tokens=response.usage_metadata.candidates_token_count or 0, + total_tokens=response.usage_metadata.total_token_count or 0, + cached_input_tokens=cached_tokens, + reasoning_tokens=reasoning_tokens, + ) + @trace_method async def convert_response_to_chat_completion( self, @@ -642,36 +671,10 @@ class GoogleVertexClient(LLMClientBase): # "totalTokenCount": 36 # } if response.usage_metadata: - # Extract cache token data if available (Gemini uses cached_content_token_count) - # Use `is not None` to capture 0 values (meaning "provider reported 0 cached tokens") - prompt_tokens_details = None - if ( - hasattr(response.usage_metadata, "cached_content_token_count") - and response.usage_metadata.cached_content_token_count is not None - ): - from letta.schemas.openai.chat_completion_response import UsageStatisticsPromptTokenDetails + # Extract usage via centralized method + from letta.schemas.enums import ProviderType - prompt_tokens_details = UsageStatisticsPromptTokenDetails( - cached_tokens=response.usage_metadata.cached_content_token_count, - ) - - # Extract thinking/reasoning token data if available (Gemini uses thoughts_token_count) - # Use `is not None` to capture 0 values (meaning "provider reported 0 reasoning tokens") - completion_tokens_details = None - if hasattr(response.usage_metadata, "thoughts_token_count") and response.usage_metadata.thoughts_token_count is not None: - from letta.schemas.openai.chat_completion_response import UsageStatisticsCompletionTokenDetails - - completion_tokens_details = UsageStatisticsCompletionTokenDetails( - reasoning_tokens=response.usage_metadata.thoughts_token_count, - ) - - usage = UsageStatistics( - prompt_tokens=response.usage_metadata.prompt_token_count, - completion_tokens=response.usage_metadata.candidates_token_count, - total_tokens=response.usage_metadata.total_token_count, - prompt_tokens_details=prompt_tokens_details, - completion_tokens_details=completion_tokens_details, - ) + usage = self.extract_usage_statistics(response_data, llm_config).to_usage(ProviderType.google_ai) else: # Count it ourselves using the Gemini token counting API assert input_messages is not None, "Didn't get UsageMetadata from the API response, so input_messages is required" diff --git a/letta/llm_api/groq_client.py b/letta/llm_api/groq_client.py index 8a71ab28..5909f4d5 100644 --- a/letta/llm_api/groq_client.py +++ b/letta/llm_api/groq_client.py @@ -43,6 +43,14 @@ class GroqClient(OpenAIClient): data["logprobs"] = False data["n"] = 1 + # for openai.BadRequestError: Error code: 400 - {'error': {'message': "'messages.2' : for 'role:assistant' the following must be satisfied[('messages.2' : property 'reasoning_content' is unsupported)]", 'type': 'invalid_request_error'}} + if "messages" in data: + for message in data["messages"]: + if "reasoning_content" in message: + del message["reasoning_content"] + if "reasoning_content_signature" in message: + del message["reasoning_content_signature"] + return data @trace_method diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 0bee61a6..280b3eb0 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -167,8 +167,8 @@ def create( printd("unsetting function_call because functions is None") function_call = None - # openai - if llm_config.model_endpoint_type == "openai": + # openai and openrouter (OpenAI-compatible) + if llm_config.model_endpoint_type in ["openai", "openrouter"]: if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1": # only is a problem if we are *not* using an openai proxy raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"]) diff --git a/letta/llm_api/llm_client.py b/letta/llm_api/llm_client.py index 805e4038..18159743 100644 --- a/letta/llm_api/llm_client.py +++ b/letta/llm_api/llm_client.py @@ -93,6 +93,21 @@ class LLMClient: put_inner_thoughts_first=put_inner_thoughts_first, actor=actor, ) + case ProviderType.minimax: + from letta.llm_api.minimax_client import MiniMaxClient + + return MiniMaxClient( + put_inner_thoughts_first=put_inner_thoughts_first, + actor=actor, + ) + case ProviderType.openrouter: + # OpenRouter uses OpenAI-compatible API, so we can use the OpenAI client directly + from letta.llm_api.openai_client import OpenAIClient + + return OpenAIClient( + put_inner_thoughts_first=put_inner_thoughts_first, + actor=actor, + ) case ProviderType.deepseek: from letta.llm_api.deepseek_client import DeepseekClient diff --git a/letta/llm_api/llm_client_base.py b/letta/llm_api/llm_client_base.py index 697e0961..754a19e8 100644 --- a/letta/llm_api/llm_client_base.py +++ b/letta/llm_api/llm_client_base.py @@ -15,6 +15,7 @@ from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import ChatCompletionResponse from letta.schemas.provider_trace import ProviderTrace +from letta.schemas.usage import LettaUsageStatistics from letta.services.telemetry_manager import TelemetryManager from letta.settings import settings @@ -43,6 +44,10 @@ class LLMClientBase: self._telemetry_run_id: Optional[str] = None self._telemetry_step_id: Optional[str] = None self._telemetry_call_type: Optional[str] = None + self._telemetry_org_id: Optional[str] = None + self._telemetry_user_id: Optional[str] = None + self._telemetry_compaction_settings: Optional[Dict] = None + self._telemetry_llm_config: Optional[Dict] = None def set_telemetry_context( self, @@ -52,6 +57,10 @@ class LLMClientBase: run_id: Optional[str] = None, step_id: Optional[str] = None, call_type: Optional[str] = None, + org_id: Optional[str] = None, + user_id: Optional[str] = None, + compaction_settings: Optional[Dict] = None, + llm_config: Optional[Dict] = None, ) -> None: """Set telemetry context for provider trace logging.""" self._telemetry_manager = telemetry_manager @@ -60,6 +69,14 @@ class LLMClientBase: self._telemetry_run_id = run_id self._telemetry_step_id = step_id self._telemetry_call_type = call_type + self._telemetry_org_id = org_id + self._telemetry_user_id = user_id + self._telemetry_compaction_settings = compaction_settings + self._telemetry_llm_config = llm_config + + def extract_usage_statistics(self, response_data: Optional[dict], llm_config: LLMConfig) -> LettaUsageStatistics: + """Provider-specific usage parsing hook (override in subclasses). Returns LettaUsageStatistics.""" + return LettaUsageStatistics() async def request_async_with_telemetry(self, request_data: dict, llm_config: LLMConfig) -> dict: """Wrapper around request_async that logs telemetry for all requests including errors. @@ -96,6 +113,10 @@ class LLMClientBase: agent_tags=self._telemetry_agent_tags, run_id=self._telemetry_run_id, call_type=self._telemetry_call_type, + org_id=self._telemetry_org_id, + user_id=self._telemetry_user_id, + compaction_settings=self._telemetry_compaction_settings, + llm_config=self._telemetry_llm_config, ), ) except Exception as e: @@ -137,6 +158,10 @@ class LLMClientBase: agent_tags=self._telemetry_agent_tags, run_id=self._telemetry_run_id, call_type=self._telemetry_call_type, + org_id=self._telemetry_org_id, + user_id=self._telemetry_user_id, + compaction_settings=self._telemetry_compaction_settings, + llm_config=self._telemetry_llm_config, ), ) except Exception as e: diff --git a/letta/llm_api/minimax_client.py b/letta/llm_api/minimax_client.py new file mode 100644 index 00000000..6029f460 --- /dev/null +++ b/letta/llm_api/minimax_client.py @@ -0,0 +1,175 @@ +from typing import List, Optional, Union + +import anthropic +from anthropic import AsyncStream +from anthropic.types.beta import BetaMessage, BetaRawMessageStreamEvent + +from letta.llm_api.anthropic_client import AnthropicClient +from letta.log import get_logger +from letta.otel.tracing import trace_method +from letta.schemas.agent import AgentType +from letta.schemas.llm_config import LLMConfig +from letta.schemas.message import Message as PydanticMessage +from letta.settings import model_settings + +logger = get_logger(__name__) + + +class MiniMaxClient(AnthropicClient): + """ + MiniMax LLM client using Anthropic-compatible API. + + Uses the beta messages API to ensure compatibility with Anthropic streaming interfaces. + Temperature must be in range (0.0, 1.0]. + Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc. + + Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api + + Note: We override client creation to always use llm_config.model_endpoint as base_url + (required for BYOK where provider_name is user's custom name, not "minimax"). + We also override request methods to avoid passing Anthropic-specific beta headers. + """ + + @trace_method + def _get_anthropic_client( + self, llm_config: LLMConfig, async_client: bool = False + ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]: + """Create Anthropic client configured for MiniMax API.""" + api_key, _, _ = self.get_byok_overrides(llm_config) + + if not api_key: + api_key = model_settings.minimax_api_key + + # Always use model_endpoint for base_url (works for both base and BYOK providers) + base_url = llm_config.model_endpoint + + if async_client: + return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) + + @trace_method + async def _get_anthropic_client_async( + self, llm_config: LLMConfig, async_client: bool = False + ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]: + """Create Anthropic client configured for MiniMax API (async version).""" + api_key, _, _ = await self.get_byok_overrides_async(llm_config) + + if not api_key: + api_key = model_settings.minimax_api_key + + # Always use model_endpoint for base_url (works for both base and BYOK providers) + base_url = llm_config.model_endpoint + + if async_client: + return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) + + @trace_method + def request(self, request_data: dict, llm_config: LLMConfig) -> dict: + """ + Synchronous request to MiniMax API. + + Uses beta messages API for compatibility with Anthropic streaming interfaces. + """ + client = self._get_anthropic_client(llm_config, async_client=False) + + response: BetaMessage = client.beta.messages.create(**request_data) + return response.model_dump() + + @trace_method + async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict: + """ + Asynchronous request to MiniMax API. + + Uses beta messages API for compatibility with Anthropic streaming interfaces. + """ + client = await self._get_anthropic_client_async(llm_config, async_client=True) + + try: + response: BetaMessage = await client.beta.messages.create(**request_data) + return response.model_dump() + except ValueError as e: + # Handle streaming fallback if needed (similar to Anthropic client) + if "streaming is required" in str(e).lower(): + logger.warning( + "[MiniMax] Non-streaming request rejected. Falling back to streaming mode. Error: %s", + str(e), + ) + return await self._request_via_streaming(request_data, llm_config, betas=[]) + raise + + @trace_method + async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]: + """ + Asynchronous streaming request to MiniMax API. + + Uses beta messages API for compatibility with Anthropic streaming interfaces. + """ + client = await self._get_anthropic_client_async(llm_config, async_client=True) + request_data["stream"] = True + + try: + return await client.beta.messages.create(**request_data) + except Exception as e: + logger.error(f"Error streaming MiniMax request: {e}") + raise e + + @trace_method + def build_request_data( + self, + agent_type: AgentType, + messages: List[PydanticMessage], + llm_config: LLMConfig, + tools: Optional[List[dict]] = None, + force_tool_call: Optional[str] = None, + requires_subsequent_tool_call: bool = False, + tool_return_truncation_chars: Optional[int] = None, + ) -> dict: + """ + Build request data for MiniMax API. + + Inherits most logic from AnthropicClient, with MiniMax-specific adjustments: + - Temperature must be in range (0.0, 1.0] + """ + data = super().build_request_data( + agent_type, + messages, + llm_config, + tools, + force_tool_call, + requires_subsequent_tool_call, + tool_return_truncation_chars, + ) + + # MiniMax temperature range is (0.0, 1.0], recommended value: 1 + if data.get("temperature") is not None: + temp = data["temperature"] + if temp <= 0: + data["temperature"] = 0.01 # Minimum valid value (exclusive of 0) + logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 0.01.") + elif temp > 1.0: + data["temperature"] = 1.0 # Maximum valid value + logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 1.0.") + + # MiniMax ignores these Anthropic-specific parameters, but we can remove them + # to avoid potential issues (they won't cause errors, just ignored) + # Note: We don't remove them since MiniMax silently ignores them + + return data + + def is_reasoning_model(self, llm_config: LLMConfig) -> bool: + """ + All MiniMax M2.x models support native interleaved thinking. + + Unlike Anthropic where only certain models (Claude 3.7+) support extended thinking, + all MiniMax models natively support thinking blocks without beta headers. + """ + return True + + def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool: + """MiniMax models support all tool choice modes.""" + return False + + def supports_structured_output(self, llm_config: LLMConfig) -> bool: + """MiniMax doesn't currently advertise structured output support.""" + return False diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 87324451..ca26c768 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -60,6 +60,7 @@ from letta.schemas.openai.chat_completion_response import ( ) from letta.schemas.openai.responses_request import ResponsesRequest from letta.schemas.response_format import JsonSchemaResponseFormat +from letta.schemas.usage import LettaUsageStatistics from letta.settings import model_settings logger = get_logger(__name__) @@ -169,6 +170,7 @@ def supports_content_none(llm_config: LLMConfig) -> bool: class OpenAIClient(LLMClientBase): def _prepare_client_kwargs(self, llm_config: LLMConfig) -> dict: api_key, _, _ = self.get_byok_overrides(llm_config) + has_byok_key = api_key is not None # Track if we got a BYOK key # Default to global OpenAI key when no BYOK override if not api_key: @@ -181,9 +183,11 @@ class OpenAIClient(LLMClientBase): llm_config.provider_name == "openrouter" ) if is_openrouter: - or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY") - if or_key: - kwargs["api_key"] = or_key + # Only use prod OpenRouter key if no BYOK key was provided + if not has_byok_key: + or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY") + if or_key: + kwargs["api_key"] = or_key # Attach optional headers if provided headers = {} if model_settings.openrouter_referer: @@ -207,6 +211,7 @@ class OpenAIClient(LLMClientBase): async def _prepare_client_kwargs_async(self, llm_config: LLMConfig) -> dict: api_key, _, _ = await self.get_byok_overrides_async(llm_config) + has_byok_key = api_key is not None # Track if we got a BYOK key if not api_key: api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY") @@ -216,9 +221,11 @@ class OpenAIClient(LLMClientBase): llm_config.provider_name == "openrouter" ) if is_openrouter: - or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY") - if or_key: - kwargs["api_key"] = or_key + # Only use prod OpenRouter key if no BYOK key was provided + if not has_byok_key: + or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY") + if or_key: + kwargs["api_key"] = or_key headers = {} if model_settings.openrouter_referer: headers["HTTP-Referer"] = model_settings.openrouter_referer @@ -591,6 +598,66 @@ class OpenAIClient(LLMClientBase): def is_reasoning_model(self, llm_config: LLMConfig) -> bool: return is_openai_reasoning_model(llm_config.model) + def extract_usage_statistics(self, response_data: dict | None, llm_config: LLMConfig) -> LettaUsageStatistics: + """Extract usage statistics from OpenAI response and return as LettaUsageStatistics.""" + if not response_data: + return LettaUsageStatistics() + + # Handle Responses API format (used by reasoning models like o1/o3) + if response_data.get("object") == "response": + usage = response_data.get("usage", {}) or {} + prompt_tokens = usage.get("input_tokens") or 0 + completion_tokens = usage.get("output_tokens") or 0 + total_tokens = usage.get("total_tokens") or (prompt_tokens + completion_tokens) + + input_details = usage.get("input_tokens_details", {}) or {} + cached_tokens = input_details.get("cached_tokens") + + output_details = usage.get("output_tokens_details", {}) or {} + reasoning_tokens = output_details.get("reasoning_tokens") + + return LettaUsageStatistics( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + cached_input_tokens=cached_tokens, + reasoning_tokens=reasoning_tokens, + ) + + # Handle standard Chat Completions API format using pydantic models + from openai.types.chat import ChatCompletion + + try: + completion = ChatCompletion.model_validate(response_data) + except Exception: + return LettaUsageStatistics() + + if not completion.usage: + return LettaUsageStatistics() + + usage = completion.usage + prompt_tokens = usage.prompt_tokens or 0 + completion_tokens = usage.completion_tokens or 0 + total_tokens = usage.total_tokens or (prompt_tokens + completion_tokens) + + # Extract cached tokens from prompt_tokens_details + cached_tokens = None + if usage.prompt_tokens_details: + cached_tokens = usage.prompt_tokens_details.cached_tokens + + # Extract reasoning tokens from completion_tokens_details + reasoning_tokens = None + if usage.completion_tokens_details: + reasoning_tokens = usage.completion_tokens_details.reasoning_tokens + + return LettaUsageStatistics( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + cached_input_tokens=cached_tokens, + reasoning_tokens=reasoning_tokens, + ) + @trace_method async def convert_response_to_chat_completion( self, @@ -607,30 +674,10 @@ class OpenAIClient(LLMClientBase): # See example payload in tests/integration_test_send_message_v2.py model = response_data.get("model") - # Extract usage - usage = response_data.get("usage", {}) or {} - prompt_tokens = usage.get("input_tokens") or 0 - completion_tokens = usage.get("output_tokens") or 0 - total_tokens = usage.get("total_tokens") or (prompt_tokens + completion_tokens) + # Extract usage via centralized method + from letta.schemas.enums import ProviderType - # Extract detailed token breakdowns (Responses API uses input_tokens_details/output_tokens_details) - prompt_tokens_details = None - input_details = usage.get("input_tokens_details", {}) or {} - if input_details.get("cached_tokens"): - from letta.schemas.openai.chat_completion_response import UsageStatisticsPromptTokenDetails - - prompt_tokens_details = UsageStatisticsPromptTokenDetails( - cached_tokens=input_details.get("cached_tokens") or 0, - ) - - completion_tokens_details = None - output_details = usage.get("output_tokens_details", {}) or {} - if output_details.get("reasoning_tokens"): - from letta.schemas.openai.chat_completion_response import UsageStatisticsCompletionTokenDetails - - completion_tokens_details = UsageStatisticsCompletionTokenDetails( - reasoning_tokens=output_details.get("reasoning_tokens") or 0, - ) + usage_stats = self.extract_usage_statistics(response_data, llm_config).to_usage(ProviderType.openai) # Extract assistant message text from the outputs list outputs = response_data.get("output") or [] @@ -698,13 +745,7 @@ class OpenAIClient(LLMClientBase): choices=[choice], created=int(response_data.get("created_at") or 0), model=model or (llm_config.model if hasattr(llm_config, "model") else None), - usage=UsageStatistics( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - prompt_tokens_details=prompt_tokens_details, - completion_tokens_details=completion_tokens_details, - ), + usage=usage_stats, ) return chat_completion_response diff --git a/letta/model_specs/__init__.py b/letta/model_specs/__init__.py new file mode 100644 index 00000000..c5fc8c4a --- /dev/null +++ b/letta/model_specs/__init__.py @@ -0,0 +1 @@ +"""Model specification utilities for Letta.""" diff --git a/letta/model_specs/litellm_model_specs.py b/letta/model_specs/litellm_model_specs.py new file mode 100644 index 00000000..f84485e1 --- /dev/null +++ b/letta/model_specs/litellm_model_specs.py @@ -0,0 +1,120 @@ +""" +Utility functions for working with litellm model specifications. + +This module provides access to model specifications from the litellm model_prices_and_context_window.json file. +The data is synced from: https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json +""" + +import json +import os +from typing import Optional + +import aiofiles +from async_lru import alru_cache + +from letta.log import get_logger + +logger = get_logger(__name__) + +# Path to the litellm model specs JSON file +MODEL_SPECS_PATH = os.path.join(os.path.dirname(__file__), "model_prices_and_context_window.json") + + +@alru_cache(maxsize=1) +async def load_model_specs() -> dict: + """Load the litellm model specifications from the JSON file. + + Returns: + dict: The model specifications data + + Raises: + FileNotFoundError: If the model specs file is not found + json.JSONDecodeError: If the file is not valid JSON + """ + if not os.path.exists(MODEL_SPECS_PATH): + logger.warning(f"Model specs file not found at {MODEL_SPECS_PATH}") + return {} + + try: + async with aiofiles.open(MODEL_SPECS_PATH, "r") as f: + content = await f.read() + return json.loads(content) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse model specs JSON: {e}") + return {} + + +async def get_model_spec(model_name: str) -> Optional[dict]: + """Get the specification for a specific model. + + Args: + model_name: The name of the model (e.g., "gpt-4o", "gpt-4o-mini") + + Returns: + Optional[dict]: The model specification if found, None otherwise + """ + specs = await load_model_specs() + return specs.get(model_name) + + +async def get_max_input_tokens(model_name: str) -> Optional[int]: + """Get the max input tokens for a model. + + Args: + model_name: The name of the model + + Returns: + Optional[int]: The max input tokens if found, None otherwise + """ + spec = await get_model_spec(model_name) + if not spec: + return None + + return spec.get("max_input_tokens") + + +async def get_max_output_tokens(model_name: str) -> Optional[int]: + """Get the max output tokens for a model. + + Args: + model_name: The name of the model + + Returns: + Optional[int]: The max output tokens if found, None otherwise + """ + spec = await get_model_spec(model_name) + if not spec: + return None + + # Try max_output_tokens first, fall back to max_tokens + return spec.get("max_output_tokens") or spec.get("max_tokens") + + +async def get_context_window(model_name: str) -> Optional[int]: + """Get the context window size for a model. + + For most models, this is the max_input_tokens. + + Args: + model_name: The name of the model + + Returns: + Optional[int]: The context window size if found, None otherwise + """ + return await get_max_input_tokens(model_name) + + +async def get_litellm_provider(model_name: str) -> Optional[str]: + """Get the litellm provider for a model. + + Args: + model_name: The name of the model + + Returns: + Optional[str]: The provider name if found, None otherwise + """ + spec = await get_model_spec(model_name) + if not spec: + return None + + return spec.get("litellm_provider") diff --git a/letta/model_specs/model_prices_and_context_window.json b/letta/model_specs/model_prices_and_context_window.json new file mode 100644 index 00000000..e4081310 --- /dev/null +++ b/letta/model_specs/model_prices_and_context_window.json @@ -0,0 +1,31925 @@ +{ + "sample_spec": { + "code_interpreter_cost_per_session": 0.0, + "computer_use_input_cost_per_1k_tokens": 0.0, + "computer_use_output_cost_per_1k_tokens": 0.0, + "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD", + "file_search_cost_per_1k_calls": 0.0, + "file_search_cost_per_gb_per_day": 0.0, + "input_cost_per_audio_token": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "one of https://docs.litellm.ai/docs/providers", + "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", + "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", + "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", + "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank, search", + "output_cost_per_reasoning_token": 0.0, + "output_cost_per_token": 0.0, + "search_context_cost_per_query": { + "search_context_size_high": 0.0, + "search_context_size_low": 0.0, + "search_context_size_medium": 0.0 + }, + "supported_regions": [ + "global", + "us-west-2", + "eu-west-1", + "ap-southeast-1", + "ap-northeast-1" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "vector_store_cost_per_gb_per_day": 0.0 + }, + "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 2600, + "mode": "image_generation", + "output_cost_per_image": 0.06 + }, + "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.04 + }, + "1024-x-1024/dall-e-2": { + "input_cost_per_pixel": 1.9e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.08 + }, + "256-x-256/dall-e-2": { + "input_cost_per_pixel": 2.4414e-7, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.018 + }, + "512-x-512/dall-e-2": { + "input_cost_per_pixel": 6.86e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.036 + }, + "ai21.j2-mid-v1": { + "input_cost_per_token": 1.25e-5, + "litellm_provider": "bedrock", + "max_input_tokens": 8191, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.25e-5 + }, + "ai21.j2-ultra-v1": { + "input_cost_per_token": 1.88e-5, + "litellm_provider": "bedrock", + "max_input_tokens": 8191, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.88e-5 + }, + "ai21.jamba-1-5-large-v1:0": { + "input_cost_per_token": 2e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-6 + }, + "ai21.jamba-1-5-mini-v1:0": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7 + }, + "ai21.jamba-instruct-v1:0": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 70000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7e-7, + "supports_system_messages": true + }, + "aiml/dall-e-2": { + "litellm_provider": "aiml", + "metadata": { + "notes": "DALL-E 2 via AI/ML API - Reliable text-to-image generation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.021, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/dall-e-3": { + "litellm_provider": "aiml", + "metadata": { + "notes": "DALL-E 3 via AI/ML API - High-quality text-to-image generation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.042, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux-pro": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Dev - Development version optimized for experimentation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.053, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux-pro/v1.1": { + "litellm_provider": "aiml", + "mode": "image_generation", + "output_cost_per_image": 0.042, + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux-pro/v1.1-ultra": { + "litellm_provider": "aiml", + "mode": "image_generation", + "output_cost_per_image": 0.063, + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux-realism": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Pro - Professional-grade image generation model" + }, + "mode": "image_generation", + "output_cost_per_image": 0.037, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux/dev": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Dev - Development version optimized for experimentation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.026, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux/kontext-max/text-to-image": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Pro v1.1 - Enhanced version with improved capabilities and 6x faster inference speed" + }, + "mode": "image_generation", + "output_cost_per_image": 0.084, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux/kontext-pro/text-to-image": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Pro v1.1 - Enhanced version with improved capabilities and 6x faster inference speed" + }, + "mode": "image_generation", + "output_cost_per_image": 0.042, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/flux/schnell": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Schnell - Fast generation model optimized for speed" + }, + "mode": "image_generation", + "output_cost_per_image": 0.003, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/google/imagen-4.0-ultra-generate-001": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Imagen 4.0 Ultra Generate API - Photorealistic image generation with precise text rendering" + }, + "mode": "image_generation", + "output_cost_per_image": 0.063, + "source": "https://docs.aimlapi.com/api-references/image-models/google/imagen-4-ultra-generate", + "supported_endpoints": ["/v1/images/generations"] + }, + "aiml/google/nano-banana-pro": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Gemini 3 Pro Image (Nano Banana Pro) - Advanced text-to-image generation with reasoning and 4K resolution support" + }, + "mode": "image_generation", + "output_cost_per_image": 0.1575, + "source": "https://docs.aimlapi.com/api-references/image-models/google/gemini-3-pro-image-preview", + "supported_endpoints": ["/v1/images/generations"] + }, + "amazon.nova-canvas-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 2600, + "mode": "image_generation", + "output_cost_per_image": 0.06 + }, + "us.writer.palmyra-x4-v1:0": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "us.writer.palmyra-x5-v1:0": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "writer.palmyra-x4-v1:0": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "writer.palmyra-x5-v1:0": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "amazon.nova-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.4e-7, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "apac.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 8.25e-8, + "input_cost_per_token": 3.3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "eu.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 8.25e-8, + "input_cost_per_token": 3.3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "us.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 8.25e-8, + "input_cost_per_token": 3.3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "amazon.nova-2-multimodal-embeddings-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 8172, + "max_tokens": 8172, + "mode": "embedding", + "input_cost_per_token": 1.35e-7, + "input_cost_per_image": 6e-5, + "input_cost_per_video_per_second": 0.0007, + "input_cost_per_audio_per_second": 0.00014, + "output_cost_per_token": 0.0, + "output_vector_size": 3072, + "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/model-catalog/serverless/amazon.nova-2-multimodal-embeddings-v1:0", + "supports_embedding_image_input": true, + "supports_image_input": true, + "supports_video_input": true, + "supports_audio_input": true + }, + "amazon.nova-micro-v1:0": { + "input_cost_per_token": 3.5e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.4e-7, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "amazon.nova-pro-v1:0": { + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.2e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon.rerank-v1:0": { + "input_cost_per_query": 0.001, + "input_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "max_document_chunks_per_query": 100, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "max_tokens_per_document_chunk": 512, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "amazon.titan-embed-image-v1": { + "input_cost_per_image": 6e-5, + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128, + "max_tokens": 128, + "metadata": { + "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1", + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "amazon.titan-embed-text-v1": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "amazon.titan-embed-text-v2:0": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "amazon.titan-image-generator-v1": { + "input_cost_per_image": 0.0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_512_and_512_pixels": 0.01, + "output_cost_per_image_above_512_and_512_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "amazon.titan-image-generator-v2": { + "input_cost_per_image": 0.0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "amazon.titan-image-generator-v2:0": { + "input_cost_per_image": 0.0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "twelvelabs.marengo-embed-2-7-v1:0": { + "input_cost_per_token": 7e-5, + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "us.twelvelabs.marengo-embed-2-7-v1:0": { + "input_cost_per_token": 7e-5, + "input_cost_per_video_per_second": 0.0007, + "input_cost_per_audio_per_second": 0.00014, + "input_cost_per_image": 0.0001, + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "eu.twelvelabs.marengo-embed-2-7-v1:0": { + "input_cost_per_token": 7e-5, + "input_cost_per_video_per_second": 0.0007, + "input_cost_per_audio_per_second": 0.00014, + "input_cost_per_image": 0.0001, + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "twelvelabs.pegasus-1-2-v1:0": { + "input_cost_per_video_per_second": 0.00049, + "output_cost_per_token": 7.5e-6, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_video_input": true + }, + "us.twelvelabs.pegasus-1-2-v1:0": { + "input_cost_per_video_per_second": 0.00049, + "output_cost_per_token": 7.5e-6, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_video_input": true + }, + "eu.twelvelabs.pegasus-1-2-v1:0": { + "input_cost_per_video_per_second": 0.00049, + "output_cost_per_token": 7.5e-6, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_video_input": true + }, + "amazon.titan-text-express-v1": { + "input_cost_per_token": 1.3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.7e-6 + }, + "amazon.titan-text-lite-v1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-7 + }, + "amazon.titan-text-premier-v1:0": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-6 + }, + "anthropic.claude-3-5-haiku-20241022-v1:0": { + "cache_creation_input_token_cost": 1e-6, + "cache_read_input_token_cost": 8e-8, + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "anthropic.claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-5-sonnet-20241022-v2:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-7-sonnet-20240620-v1:0": { + "cache_creation_input_token_cost": 4.5e-6, + "cache_read_input_token_cost": 3.6e-7, + "input_cost_per_token": 3.6e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-7-sonnet-20250219-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-opus-20240229-v1:0": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-instant-v1": { + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-6, + "supports_tool_choice": true + }, + "anthropic.claude-opus-4-1-20250805-v1:0": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-opus-4-20250514-v1:0": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-v1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5 + }, + "anthropic.claude-v2:1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "anyscale/HuggingFaceH4/zephyr-7b-beta": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-7 + }, + "anyscale/codellama/CodeLlama-34b-Instruct-hf": { + "input_cost_per_token": 1e-6, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "anyscale/codellama/CodeLlama-70b-Instruct-hf": { + "input_cost_per_token": 1e-6, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-6, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf" + }, + "anyscale/google/gemma-7b-it": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/google-gemma-7b-it" + }, + "anyscale/meta-llama/Llama-2-13b-chat-hf": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-7 + }, + "anyscale/meta-llama/Llama-2-70b-chat-hf": { + "input_cost_per_token": 1e-6, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "anyscale/meta-llama/Llama-2-7b-chat-hf": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-7 + }, + "anyscale/meta-llama/Meta-Llama-3-70B-Instruct": { + "input_cost_per_token": 1e-6, + "litellm_provider": "anyscale", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-6, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct" + }, + "anyscale/meta-llama/Meta-Llama-3-8B-Instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct" + }, + "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mistral-7B-Instruct-v0.1", + "supports_function_calling": true + }, + "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1": { + "input_cost_per_token": 9e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x22B-Instruct-v0.1", + "supports_function_calling": true + }, + "anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x7B-Instruct-v0.1", + "supports_function_calling": true + }, + "apac.amazon.nova-lite-v1:0": { + "input_cost_per_token": 6.3e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.52e-7, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "apac.amazon.nova-micro-v1:0": { + "input_cost_per_token": 3.7e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.48e-7, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "apac.amazon.nova-pro-v1:0": { + "input_cost_per_token": 8.4e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.36e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "apac.anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-6, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-6, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "apac.anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "apac.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "assemblyai/best": { + "input_cost_per_second": 3.333e-5, + "litellm_provider": "assemblyai", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "assemblyai/nano": { + "input_cost_per_second": 0.00010278, + "litellm_provider": "assemblyai", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-6, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 3.3e-6, + "input_cost_per_token_above_200k_tokens": 6.6e-6, + "output_cost_per_token_above_200k_tokens": 2.475e-5, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "azure/ada": { + "input_cost_per_token": 1e-7, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/codex-mini": { + "cache_read_input_token_cost": 3.75e-7, + "input_cost_per_token": 1.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 6e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/command-r-plus": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true + }, + "azure_ai/claude-haiku-4-5": { + "input_cost_per_token": 1e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-opus-4-5": { + "input_cost_per_token": 5e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-opus-4-1": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-sonnet-4-5": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/computer-use-preview": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/container": { + "code_interpreter_cost_per_session": 0.03, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure_ai/gpt-oss-120b": { + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.375e-6, + "input_cost_per_token": 2.75e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_creation_input_token_cost": 1.38e-6, + "input_cost_per_token": 2.75e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 8.3e-8, + "input_cost_per_token": 1.65e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3.3e-7, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_audio_token": 1.1e-5, + "input_cost_per_token": 6.6e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2.2e-5, + "output_cost_per_token": 2.64e-6, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-4o-realtime-preview-2024-10-01": { + "cache_creation_input_audio_token_cost": 2.2e-5, + "cache_read_input_token_cost": 2.75e-6, + "input_cost_per_audio_token": 0.00011, + "input_cost_per_token": 5.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00022, + "output_cost_per_token": 2.2e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_audio_token_cost": 2.5e-6, + "cache_read_input_token_cost": 2.75e-6, + "input_cost_per_audio_token": 4.4e-5, + "input_cost_per_token": 5.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 2.2e-5, + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.375e-7, + "input_cost_per_token": 1.375e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.75e-8, + "input_cost_per_token": 2.75e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.75e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2.2e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5.5e-9, + "input_cost_per_token": 5.5e-8, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.4e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/o1-2024-12-17": { + "cache_read_input_token_cost": 8.25e-6, + "input_cost_per_token": 1.65e-5, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6.6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/o1-mini-2024-09-12": { + "cache_read_input_token_cost": 6.05e-7, + "input_cost_per_token": 1.21e-6, + "input_cost_per_token_batches": 6.05e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.84e-6, + "output_cost_per_token_batches": 2.42e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/eu/o1-preview-2024-09-12": { + "cache_read_input_token_cost": 8.25e-6, + "input_cost_per_token": 1.65e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/eu/o3-mini-2025-01-31": { + "cache_read_input_token_cost": 6.05e-7, + "input_cost_per_token": 1.21e-6, + "input_cost_per_token_batches": 6.05e-7, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.84e-6, + "output_cost_per_token_batches": 2.42e-6, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/global-standard/gpt-4o-2024-08-06": { + "cache_read_input_token_cost": 1.25e-6, + "deprecation_date": "2026-02-27", + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global-standard/gpt-4o-2024-11-20": { + "cache_read_input_token_cost": 1.25e-6, + "deprecation_date": "2026-03-01", + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global-standard/gpt-4o-mini": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-3.5-turbo": { + "input_cost_per_token": 5e-7, + "litellm_provider": "azure", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-3.5-turbo-0125": { + "deprecation_date": "2025-03-31", + "input_cost_per_token": 5e-7, + "litellm_provider": "azure", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-3.5-turbo-instruct-0914": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "azure_text", + "max_input_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "azure/gpt-35-turbo": { + "input_cost_per_token": 5e-7, + "litellm_provider": "azure", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-0125": { + "deprecation_date": "2025-05-31", + "input_cost_per_token": 5e-7, + "litellm_provider": "azure", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-0301": { + "deprecation_date": "2025-02-13", + "input_cost_per_token": 2e-7, + "litellm_provider": "azure", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-0613": { + "deprecation_date": "2025-02-13", + "input_cost_per_token": 1.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-1106": { + "deprecation_date": "2025-03-31", + "input_cost_per_token": 1e-6, + "litellm_provider": "azure", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-16k": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-16k-0613": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-instruct": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "azure_text", + "max_input_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "azure/gpt-35-turbo-instruct-0914": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "azure_text", + "max_input_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "azure/gpt-4": { + "input_cost_per_token": 3e-5, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-0125-preview": { + "input_cost_per_token": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-0613": { + "input_cost_per_token": 3e-5, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-1106-preview": { + "input_cost_per_token": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-32k": { + "input_cost_per_token": 6e-5, + "litellm_provider": "azure", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.00012, + "supports_tool_choice": true + }, + "azure/gpt-4-32k-0613": { + "input_cost_per_token": 6e-5, + "litellm_provider": "azure", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.00012, + "supports_tool_choice": true + }, + "azure/gpt-4-turbo": { + "input_cost_per_token": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-turbo-2024-04-09": { + "input_cost_per_token": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4-turbo-vision-preview": { + "input_cost_per_token": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4.1": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-6, + "output_cost_per_token_batches": 4e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-6, + "output_cost_per_token_batches": 4e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-mini": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 4e-7, + "input_cost_per_token_batches": 2e-7, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-6, + "output_cost_per_token_batches": 8e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-mini-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 4e-7, + "input_cost_per_token_batches": 2e-7, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-6, + "output_cost_per_token_batches": 8e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-nano": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 1e-7, + "input_cost_per_token_batches": 5e-8, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "output_cost_per_token_batches": 2e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4.1-nano-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 1e-7, + "input_cost_per_token_batches": 5e-8, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "output_cost_per_token_batches": 2e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4.5-preview": { + "cache_read_input_token_cost": 3.75e-5, + "input_cost_per_token": 7.5e-5, + "input_cost_per_token_batches": 3.75e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.00015, + "output_cost_per_token_batches": 7.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o": { + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-2024-05-13": { + "input_cost_per_token": 5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.75e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-audio-2025-08-28": { + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-audio-mini-2025-10-06": { + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 6e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-mini": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.65e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.65e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-mini-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 6e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-realtime-2025-08-28": { + "cache_creation_input_audio_token_cost": 4e-6, + "cache_read_input_token_cost": 4e-6, + "input_cost_per_audio_token": 3.2e-5, + "input_cost_per_image": 5e-6, + "input_cost_per_token": 4e-6, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-5, + "output_cost_per_token": 1.6e-5, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-realtime-mini-2025-10-06": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_audio_token": 1e-5, + "input_cost_per_image": 8e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-4o-mini-transcribe": { + "input_cost_per_audio_token": 3e-6, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 5e-6, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "azure/gpt-4o-mini-tts": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "mode": "audio_speech", + "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] + }, + "azure/gpt-4o-realtime-preview-2024-10-01": { + "cache_creation_input_audio_token_cost": 2e-5, + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_audio_token": 0.0001, + "input_cost_per_token": 5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.0002, + "output_cost_per_token": 2e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 2e-5, + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-4o-transcribe": { + "input_cost_per_audio_token": 6e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "azure/gpt-4o-transcribe-diarize": { + "input_cost_per_audio_token": 6e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "azure/gpt-5.1-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "azure/gpt-5.1-chat-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "azure/gpt-5.1-codex-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-mini-2025-11-13": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-6, + "output_cost_per_token_priority": 3.6e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-chat": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "azure/gpt-5-chat-latest": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "azure/gpt-5-codex": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-mini": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-nano": { + "cache_read_input_token_cost": 5e-9, + "input_cost_per_token": 5e-8, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5e-9, + "input_cost_per_token": 5e-8, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-pro": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5", + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2": { + "cache_read_input_token_cost": 1.75e-7, + "input_cost_per_token": 1.75e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-2025-12-11": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 1.75e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "output_cost_per_token_priority": 2.8e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "azure/gpt-5.2-chat": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 1.75e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "output_cost_per_token_priority": 2.8e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-chat-2025-12-11": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 1.75e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "output_cost_per_token_priority": 2.8e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-codex": { + "cache_read_input_token_cost": 1.75e-7, + "input_cost_per_token": 1.75e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-pro": { + "input_cost_per_token": 2.1e-5, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-5.2-pro-2025-12-11": { + "input_cost_per_token": 2.1e-5, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-image-1": { + "cache_read_input_image_token_cost": 2.5e-6, + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_image_token": 1e-5, + "input_cost_per_token": 5e-6, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 4e-5, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, + "azure/hd/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 7.629e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/hd/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 6.539e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/hd/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 6.539e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/high/1024-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.59263611e-7, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1024-x-1536/gpt-image-1": { + "input_cost_per_pixel": 1.58945719e-7, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1536-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.58945719e-7, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1024-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.0490417e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1024-x-1536/gpt-image-1": { + "input_cost_per_pixel": 1.0172526e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1536-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.0172526e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1024-x-1024/gpt-image-1": { + "input_cost_per_pixel": 4.0054321e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1024-x-1536/gpt-image-1": { + "input_cost_per_pixel": 4.0054321e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1536-x-1024/gpt-image-1": { + "input_cost_per_pixel": 4.0054321e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/gpt-image-1-mini": { + "cache_read_input_image_token_cost": 2.5e-7, + "cache_read_input_token_cost": 2e-7, + "input_cost_per_image_token": 2.5e-6, + "input_cost_per_token": 2e-6, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 8e-6, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, + "azure/gpt-image-1.5": { + "cache_read_input_image_token_cost": 2e-6, + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 5e-6, + "input_cost_per_image_token": 8e-6, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 3.2e-5, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, + "azure/gpt-image-1.5-2025-12-16": { + "cache_read_input_image_token_cost": 2e-6, + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 5e-6, + "input_cost_per_image_token": 8e-6, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 3.2e-5, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, + "azure/low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0751953125e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0751953125e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0345052083e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 8.056640625e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 8.056640625e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 7.9752604167e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 3.173828125e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 3.173828125e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 3.1575520833e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/mistral-large-2402": { + "input_cost_per_token": 8e-6, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_function_calling": true + }, + "azure/mistral-large-latest": { + "input_cost_per_token": 8e-6, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_function_calling": true + }, + "azure/o1": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o1-2024-12-17": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o1-mini": { + "cache_read_input_token_cost": 6.05e-7, + "input_cost_per_token": 1.21e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.84e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o1-mini-2024-09-12": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o1-preview": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o1-preview-2024-09-12": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o3": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o3-2025-04-16": { + "deprecation_date": "2026-04-16", + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o3-deep-research": { + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_token": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 4e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/o3-mini": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/o3-mini-2025-01-31": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/o3-pro": { + "input_cost_per_token": 2e-5, + "input_cost_per_token_batches": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-5, + "output_cost_per_token_batches": 4e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o3-pro-2025-06-10": { + "input_cost_per_token": 2e-5, + "input_cost_per_token_batches": 1e-5, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-5, + "output_cost_per_token_batches": 4e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o4-mini": { + "cache_read_input_token_cost": 2.75e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o4-mini-2025-04-16": { + "cache_read_input_token_cost": 2.75e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/standard/1024-x-1024/dall-e-2": { + "input_cost_per_pixel": 0.0, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/standard/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 3.81469e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/standard/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 4.359e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/standard/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 4.359e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/text-embedding-3-large": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/text-embedding-3-small": { + "deprecation_date": "2026-04-30", + "input_cost_per_token": 2e-8, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/text-embedding-ada-002": { + "input_cost_per_token": 1e-7, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/speech/azure-tts": { + "input_cost_per_character": 1.5e-5, + "litellm_provider": "azure", + "mode": "audio_speech", + "source": "https://azure.microsoft.com/en-us/pricing/calculator/" + }, + "azure/speech/azure-tts-hd": { + "input_cost_per_character": 3e-5, + "litellm_provider": "azure", + "mode": "audio_speech", + "source": "https://azure.microsoft.com/en-us/pricing/calculator/" + }, + "azure/tts-1": { + "input_cost_per_character": 1.5e-5, + "litellm_provider": "azure", + "mode": "audio_speech" + }, + "azure/tts-1-hd": { + "input_cost_per_character": 3e-5, + "litellm_provider": "azure", + "mode": "audio_speech" + }, + "azure/us/gpt-4.1-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 2.2e-6, + "input_cost_per_token_batches": 1.1e-6, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8.8e-6, + "output_cost_per_token_batches": 4.4e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/us/gpt-4.1-mini-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 4.4e-7, + "input_cost_per_token_batches": 2.2e-7, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.76e-6, + "output_cost_per_token_batches": 8.8e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/us/gpt-4.1-nano-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 1.1e-7, + "input_cost_per_token_batches": 6e-8, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4.4e-7, + "output_cost_per_token_batches": 2.2e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.375e-6, + "input_cost_per_token": 2.75e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_creation_input_token_cost": 1.38e-6, + "input_cost_per_token": 2.75e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 8.3e-8, + "input_cost_per_token": 1.65e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3.3e-7, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_audio_token": 1.1e-5, + "input_cost_per_token": 6.6e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2.2e-5, + "output_cost_per_token": 2.64e-6, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-4o-realtime-preview-2024-10-01": { + "cache_creation_input_audio_token_cost": 2.2e-5, + "cache_read_input_token_cost": 2.75e-6, + "input_cost_per_audio_token": 0.00011, + "input_cost_per_token": 5.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00022, + "output_cost_per_token": 2.2e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_audio_token_cost": 2.5e-6, + "cache_read_input_token_cost": 2.75e-6, + "input_cost_per_audio_token": 4.4e-5, + "input_cost_per_token": 5.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 2.2e-5, + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.375e-7, + "input_cost_per_token": 1.375e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.75e-8, + "input_cost_per_token": 2.75e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5.5e-9, + "input_cost_per_token": 5.5e-8, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.4e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5.1": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.75e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2.2e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/o1-2024-12-17": { + "cache_read_input_token_cost": 8.25e-6, + "input_cost_per_token": 1.65e-5, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6.6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/o1-mini-2024-09-12": { + "cache_read_input_token_cost": 6.05e-7, + "input_cost_per_token": 1.21e-6, + "input_cost_per_token_batches": 6.05e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.84e-6, + "output_cost_per_token_batches": 2.42e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/us/o1-preview-2024-09-12": { + "cache_read_input_token_cost": 8.25e-6, + "input_cost_per_token": 1.65e-5, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/us/o3-2025-04-16": { + "deprecation_date": "2026-04-16", + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 2.2e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8.8e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/o3-mini-2025-01-31": { + "cache_read_input_token_cost": 6.05e-7, + "input_cost_per_token": 1.21e-6, + "input_cost_per_token_batches": 6.05e-7, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.84e-6, + "output_cost_per_token_batches": 2.42e-6, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/us/o4-mini-2025-04-16": { + "cache_read_input_token_cost": 3.1e-7, + "input_cost_per_token": 1.21e-6, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.84e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/whisper-1": { + "input_cost_per_second": 0.0001, + "litellm_provider": "azure", + "mode": "audio_transcription", + "output_cost_per_second": 0.0001 + }, + "azure_ai/Cohere-embed-v3-english": { + "input_cost_per_token": 1e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", + "supports_embedding_image_input": true + }, + "azure_ai/Cohere-embed-v3-multilingual": { + "input_cost_per_token": 1e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", + "supports_embedding_image_input": true + }, + "azure_ai/FLUX-1.1-pro": { + "litellm_provider": "azure_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", + "supported_endpoints": ["/v1/images/generations"] + }, + "azure_ai/FLUX.1-Kontext-pro": { + "litellm_provider": "azure_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", + "supported_endpoints": ["/v1/images/generations"] + }, + "azure_ai/flux.2-pro": { + "litellm_provider": "azure_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://ai.azure.com/explore/models/flux.2-pro/version/1/registry/azureml-blackforestlabs", + "supported_endpoints": ["/v1/images/generations"] + }, + "azure_ai/Llama-3.2-11B-Vision-Instruct": { + "input_cost_per_token": 3.7e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.7e-7, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Llama-3.2-90B-Vision-Instruct": { + "input_cost_per_token": 2.04e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.04e-6, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 7.1e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 7.1e-7, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.llama-3-3-70b-instruct-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "input_cost_per_token": 1.41e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3.5e-7, + "source": "https://azure.microsoft.com/en-us/blog/introducing-the-llama-4-herd-in-azure-ai-foundry-and-azure-databricks/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 10000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 7.8e-7, + "source": "https://azure.microsoft.com/en-us/blog/introducing-the-llama-4-herd-in-azure-ai-foundry-and-azure-databricks/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Meta-Llama-3-70B-Instruct": { + "input_cost_per_token": 1.1e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.7e-7, + "supports_tool_choice": true + }, + "azure_ai/Meta-Llama-3.1-405B-Instruct": { + "input_cost_per_token": 5.33e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.6e-5, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", + "supports_tool_choice": true + }, + "azure_ai/Meta-Llama-3.1-70B-Instruct": { + "input_cost_per_token": 2.68e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.54e-6, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice", + "supports_tool_choice": true + }, + "azure_ai/Meta-Llama-3.1-8B-Instruct": { + "input_cost_per_token": 3e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 6.1e-7, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice", + "supports_tool_choice": true + }, + "azure_ai/Phi-3-medium-128k-instruct": { + "input_cost_per_token": 1.7e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.8e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-medium-4k-instruct": { + "input_cost_per_token": 1.7e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.8e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-mini-128k-instruct": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-mini-4k-instruct": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-small-128k-instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-small-8k-instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3.5-MoE-instruct": { + "input_cost_per_token": 1.6e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.4e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3.5-mini-instruct": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3.5-vision-instruct": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Phi-4": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/affordable-innovation-unveiling-the-pricing-of-phi-3-slms-on-models-as-a-service/4156495", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-4-mini-instruct": { + "input_cost_per_token": 7.5e-8, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112", + "supports_function_calling": true + }, + "azure_ai/Phi-4-multimodal-instruct": { + "input_cost_per_audio_token": 4e-6, + "input_cost_per_token": 8e-8, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.2e-7, + "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_vision": true + }, + "azure_ai/Phi-4-mini-reasoning": { + "input_cost_per_token": 8e-8, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.2e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true + }, + "azure_ai/Phi-4-reasoning": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "azure_ai/mistral-document-ai-2505": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" + }, + "azure_ai/doc-intelligence/prebuilt-read": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.0015, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/doc-intelligence/prebuilt-layout": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.01, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/doc-intelligence/prebuilt-document": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.01, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/MAI-DS-R1": { + "input_cost_per_token": 1.35e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.4e-6, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/cohere-rerank-v3-english": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/cohere-rerank-v3-multilingual": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/cohere-rerank-v3.5": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/cohere-rerank-v4.0-pro": { + "input_cost_per_query": 0.0025, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_query_tokens": 4096, + "max_tokens": 32768, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/cohere-rerank-v4.0-fast": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_query_tokens": 4096, + "max_tokens": 32768, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/deepseek-v3.2": { + "input_cost_per_token": 5.8e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/deepseek-v3.2-speciale": { + "input_cost_per_token": 5.8e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/deepseek-r1": { + "input_cost_per_token": 1.35e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.4e-6, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367", + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/deepseek-v3": { + "input_cost_per_token": 1.14e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.56e-6, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", + "supports_tool_choice": true + }, + "azure_ai/deepseek-v3-0324": { + "input_cost_per_token": 1.14e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.56e-6, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/embed-v-4-0": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 3072, + "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image"], + "supports_embedding_image_input": true + }, + "azure_ai/global/grok-3": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/global/grok-3-mini": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.27e-6, + "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-3": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-3-mini": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.27e-6, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-non-reasoning": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-reasoning": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-code-fast-1": { + "input_cost_per_token": 2e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/jais-30b-chat": { + "input_cost_per_token": 0.0032, + "litellm_provider": "azure_ai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00971, + "source": "https://azure.microsoft.com/en-us/products/ai-services/ai-foundry/models/jais-30b-chat" + }, + "azure_ai/jamba-instruct": { + "input_cost_per_token": 5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 70000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7e-7, + "supports_tool_choice": true + }, + "azure_ai/ministral-3b": { + "input_cost_per_token": 4e-8, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-8, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large": { + "input_cost_per_token": 4e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large-2407": { + "input_cost_per_token": 2e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large-latest": { + "input_cost_per_token": 2e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large-3": { + "input_cost_per_token": 5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 256000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://azure.microsoft.com/en-us/blog/introducing-mistral-large-3-in-microsoft-foundry-open-capable-and-ready-for-production-workloads/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/mistral-medium-2505": { + "input_cost_per_token": 4e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-nemo": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice", + "supports_function_calling": true + }, + "azure_ai/mistral-small": { + "input_cost_per_token": 1e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-small-2503": { + "input_cost_per_token": 1e-6, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "babbage-002": { + "input_cost_per_token": 4e-7, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 4e-7 + }, + "bedrock/*/1-month-commitment/cohere.command-light-text-v14": { + "input_cost_per_second": 0.001902, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.001902, + "supports_tool_choice": true + }, + "bedrock/*/1-month-commitment/cohere.command-text-v14": { + "input_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.011, + "supports_tool_choice": true + }, + "bedrock/*/6-month-commitment/cohere.command-light-text-v14": { + "input_cost_per_second": 0.0011416, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.0011416, + "supports_tool_choice": true + }, + "bedrock/*/6-month-commitment/cohere.command-text-v14": { + "input_cost_per_second": 0.0066027, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.0066027, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.01475, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.01475, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0455 + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0455, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.008194, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.008194, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02527 + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02527, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { + "input_cost_per_token": 2.23e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7.55e-6, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/anthropic.claude-v1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 3.18e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.2e-6 + }, + "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-7 + }, + "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 3.05e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.03e-6 + }, + "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6.9e-7 + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.01635, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.01635, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0415 + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0415, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.009083, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.009083, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02305 + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02305, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/anthropic.claude-instant-v1": { + "input_cost_per_token": 2.48e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 8.38e-6, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/anthropic.claude-v1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5 + }, + "bedrock/eu-central-1/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.86e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.78e-6 + }, + "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6.5e-7 + }, + "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 3.45e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.55e-6 + }, + "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.9e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.8e-7 + }, + "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.6e-7, + "supports_tool_choice": true + }, + "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 1.04e-5, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3.12e-5, + "supports_function_calling": true + }, + "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 5.9e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 9.1e-7, + "supports_tool_choice": true + }, + "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Anthropic via Invoke route does not currently support pdf input." + }, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 4.45e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.88e-6 + }, + "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.01e-6 + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.011, + "supports_tool_choice": true + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175 + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175, + "supports_tool_choice": true + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00611, + "supports_tool_choice": true + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972 + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972, + "supports_tool_choice": true + }, + "bedrock/us-east-1/anthropic.claude-instant-v1": { + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-6, + "supports_tool_choice": true + }, + "bedrock/us-east-1/anthropic.claude-v1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "bedrock/us-east-1/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.5e-6 + }, + "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_tool_choice": true + }, + "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_function_calling": true + }, + "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 4.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-7, + "supports_tool_choice": true + }, + "bedrock/us-gov-east-1/amazon.nova-pro-v1:0": { + "input_cost_per_token": 9.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.84e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "bedrock/us-gov-east-1/amazon.titan-embed-text-v1": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "bedrock/us-gov-east-1/amazon.titan-embed-text-v2:0": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "bedrock/us-gov-east-1/amazon.titan-text-express-v1": { + "input_cost_per_token": 1.3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.7e-6 + }, + "bedrock/us-gov-east-1/amazon.titan-text-lite-v1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-7 + }, + "bedrock/us-gov-east-1/amazon.titan-text-premier-v1:0": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-6 + }, + "bedrock/us-gov-east-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3.6e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-east-1/anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": { + "input_cost_per_token": 3.3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.5e-6, + "supports_pdf_input": true + }, + "bedrock/us-gov-east-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.65e-6, + "supports_pdf_input": true + }, + "bedrock/us-gov-west-1/amazon.nova-pro-v1:0": { + "input_cost_per_token": 9.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.84e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "bedrock/us-gov-west-1/amazon.titan-embed-text-v1": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "bedrock/us-gov-west-1/amazon.titan-embed-text-v2:0": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "bedrock/us-gov-west-1/amazon.titan-text-express-v1": { + "input_cost_per_token": 1.3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.7e-6 + }, + "bedrock/us-gov-west-1/amazon.titan-text-lite-v1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-7 + }, + "bedrock/us-gov-west-1/amazon.titan-text-premier-v1:0": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-6 + }, + "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0": { + "cache_creation_input_token_cost": 4.5e-6, + "cache_read_input_token_cost": 3.6e-7, + "input_cost_per_token": 3.6e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-west-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3.6e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-west-1/anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": { + "input_cost_per_token": 3.3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.5e-6, + "supports_pdf_input": true + }, + "bedrock/us-gov-west-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.65e-6, + "supports_pdf_input": true + }, + "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.5e-6 + }, + "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.011, + "supports_tool_choice": true + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175 + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175, + "supports_tool_choice": true + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00611, + "supports_tool_choice": true + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972 + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972, + "supports_tool_choice": true + }, + "bedrock/us-west-2/anthropic.claude-instant-v1": { + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-6, + "supports_tool_choice": true + }, + "bedrock/us-west-2/anthropic.claude-v1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "bedrock/us-west-2/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_tool_choice": true + }, + "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_function_calling": true + }, + "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 4.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-7, + "supports_tool_choice": true + }, + "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": { + "cache_creation_input_token_cost": 1e-6, + "cache_read_input_token_cost": 8e-8, + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "cerebras/llama-3.3-70b": { + "input_cost_per_token": 8.5e-7, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "cerebras/llama3.1-70b": { + "input_cost_per_token": 6e-7, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "cerebras/llama3.1-8b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "cerebras/gpt-oss-120b": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "cerebras", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.9e-7, + "source": "https://www.cerebras.ai/blog/openai-gpt-oss-120b-runs-fastest-on-cerebras", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "cerebras/qwen-3-32b": { + "input_cost_per_token": 4e-7, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 8e-7, + "source": "https://inference-docs.cerebras.ai/support/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "cerebras/zai-glm-4.6": { + "deprecation_date": "2026-01-20", + "input_cost_per_token": 2.25e-6, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "source": "https://www.cerebras.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "cerebras/zai-glm-4.7": { + "input_cost_per_token": 2.25e-6, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "source": "https://www.cerebras.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "chat-bison": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-chat-models", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "chat-bison-32k": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-chat-models", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "chat-bison-32k@002": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-chat-models", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "chat-bison@001": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-chat-models", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "chat-bison@002": { + "deprecation_date": "2025-04-09", + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-chat-models", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "chatdolphin": { + "input_cost_per_token": 5e-7, + "litellm_provider": "nlp_cloud", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 5e-7 + }, + "chatgpt-4o-latest": { + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-transcribe-diarize": { + "input_cost_per_audio_token": 6e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "claude-3-5-haiku-20241022": { + "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 8e-8, + "deprecation_date": "2025-10-01", + "input_cost_per_token": 8e-7, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-6, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 264 + }, + "claude-3-5-haiku-latest": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 1e-7, + "deprecation_date": "2025-10-01", + "input_cost_per_token": 1e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-6, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 264 + }, + "claude-haiku-4-5-20251001": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost_above_1hr": 2e-6, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "claude-haiku-4-5": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost_above_1hr": 2e-6, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "claude-3-5-sonnet-20240620": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 3e-7, + "deprecation_date": "2025-06-01", + "input_cost_per_token": 3e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-3-5-sonnet-20241022": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 3e-7, + "deprecation_date": "2025-10-01", + "input_cost_per_token": 3e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-3-5-sonnet-latest": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 3e-7, + "deprecation_date": "2025-06-01", + "input_cost_per_token": 3e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-3-7-sonnet-20250219": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 3e-7, + "deprecation_date": "2026-02-19", + "input_cost_per_token": 3e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-3-7-sonnet-latest": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 3e-7, + "deprecation_date": "2025-06-01", + "input_cost_per_token": 3e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-3-haiku-20240307": { + "cache_creation_input_token_cost": 3e-7, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 264 + }, + "claude-3-opus-20240229": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 1.5e-6, + "deprecation_date": "2026-05-01", + "input_cost_per_token": 1.5e-5, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 395 + }, + "claude-3-opus-latest": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 1.5e-6, + "deprecation_date": "2025-03-01", + "input_cost_per_token": 1.5e-5, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 395 + }, + "claude-4-opus-20250514": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-4-sonnet-20250514": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-sonnet-4-5": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "claude-sonnet-4-5-20250929": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 346 + }, + "claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-1": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_creation_input_token_cost_above_1hr": 3e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-1-20250805": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_creation_input_token_cost_above_1hr": 3e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "deprecation_date": "2026-08-05", + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-20250514": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_creation_input_token_cost_above_1hr": 3e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "deprecation_date": "2026-05-14", + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5-20251101": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_creation_input_token_cost_above_1hr": 1e-5, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_creation_input_token_cost_above_1hr": 1e-5, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-sonnet-4-20250514": { + "deprecation_date": "2026-05-14", + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { + "input_cost_per_token": 1.923e-6, + "litellm_provider": "cloudflare", + "max_input_tokens": 3072, + "max_output_tokens": 3072, + "max_tokens": 3072, + "mode": "chat", + "output_cost_per_token": 1.923e-6 + }, + "cloudflare/@cf/meta/llama-2-7b-chat-int8": { + "input_cost_per_token": 1.923e-6, + "litellm_provider": "cloudflare", + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.923e-6 + }, + "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { + "input_cost_per_token": 1.923e-6, + "litellm_provider": "cloudflare", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.923e-6 + }, + "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { + "input_cost_per_token": 1.923e-6, + "litellm_provider": "cloudflare", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.923e-6 + }, + "code-bison": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "code-bison-32k@002": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison32k": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison@001": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison@002": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 2048, + "max_output_tokens": 64, + "max_tokens": 64, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko-latest": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 2048, + "max_output_tokens": 64, + "max_tokens": 64, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko@001": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 2048, + "max_output_tokens": 64, + "max_tokens": 64, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko@002": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-text-models", + "max_input_tokens": 2048, + "max_output_tokens": 64, + "max_tokens": 64, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-chat-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "codechat-bison-32k": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-chat-models", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "codechat-bison-32k@002": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-chat-models", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "codechat-bison@001": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-chat-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "codechat-bison@002": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-chat-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "codechat-bison@latest": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-code-chat-models", + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "codestral/codestral-2405": { + "input_cost_per_token": 0.0, + "litellm_provider": "codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "codestral/codestral-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "codex-mini-latest": { + "cache_read_input_token_cost": 3.75e-7, + "input_cost_per_token": 1.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 6e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "cohere.command-light-text-v14": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_tool_choice": true + }, + "cohere.command-r-plus-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_tool_choice": true + }, + "cohere.command-r-v1:0": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_tool_choice": true + }, + "cohere.command-text-v14": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_tool_choice": true + }, + "cohere.embed-english-v3": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "cohere.embed-multilingual-v3": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "cohere.embed-v4:0": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, + "cohere/embed-v4.0": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "cohere", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, + "cohere.rerank-v3-5:0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "max_document_chunks_per_query": 100, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "max_tokens_per_document_chunk": 512, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "command": { + "input_cost_per_token": 1e-6, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "command-a-03-2025": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "cohere_chat", + "max_input_tokens": 256000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-light": { + "input_cost_per_token": 3e-7, + "litellm_provider": "cohere_chat", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_tool_choice": true + }, + "command-nightly": { + "input_cost_per_token": 1e-6, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "command-r": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r-08-2024": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r-plus": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r-plus-08-2024": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r7b-12-2024": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.75e-8, + "source": "https://docs.cohere.com/v2/docs/command-r7b", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "computer-use-preview": { + "input_cost_per_token": 3e-6, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dall-e-2": { + "input_cost_per_image": 0.02, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits", + "/v1/images/variations" + ] + }, + "dall-e-3": { + "input_cost_per_image": 0.04, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "deepseek-chat": { + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, + "litellm_provider": "deepseek", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.2e-7, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": ["/v1/chat/completions"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "deepseek-reasoner": { + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, + "litellm_provider": "deepseek", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.2e-7, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": ["/v1/chat/completions"], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false + }, + "dashscope/qwen-coder": { + "input_cost_per_token": 3e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-flash": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "range": [0, 256000.0] + }, + { + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2e-6, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen-flash-2025-07-28": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "range": [0, 256000.0] + }, + { + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2e-6, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen-max": { + "input_cost_per_token": 1.6e-6, + "litellm_provider": "dashscope", + "max_input_tokens": 30720, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6.4e-6, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus": { + "input_cost_per_token": 4e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-01-25": { + "input_cost_per_token": 4e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-04-28": { + "input_cost_per_token": 4e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-6, + "output_cost_per_token": 1.2e-6, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-07-14": { + "input_cost_per_token": 4e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-6, + "output_cost_per_token": 1.2e-6, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-07-28": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-7, + "output_cost_per_reasoning_token": 4e-6, + "output_cost_per_token": 1.2e-6, + "range": [0, 256000.0] + }, + { + "input_cost_per_token": 1.2e-6, + "output_cost_per_reasoning_token": 1.2e-5, + "output_cost_per_token": 3.6e-6, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen-plus-2025-09-11": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-7, + "output_cost_per_reasoning_token": 4e-6, + "output_cost_per_token": 1.2e-6, + "range": [0, 256000.0] + }, + { + "input_cost_per_token": 1.2e-6, + "output_cost_per_reasoning_token": 1.2e-5, + "output_cost_per_token": 3.6e-6, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen-plus-latest": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-7, + "output_cost_per_reasoning_token": 4e-6, + "output_cost_per_token": 1.2e-6, + "range": [0, 256000.0] + }, + { + "input_cost_per_token": 1.2e-6, + "output_cost_per_reasoning_token": 1.2e-5, + "output_cost_per_token": 3.6e-6, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen-turbo": { + "input_cost_per_token": 5e-8, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 5e-7, + "output_cost_per_token": 2e-7, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-turbo-2024-11-01": { + "input_cost_per_token": 5e-8, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-turbo-2025-04-28": { + "input_cost_per_token": 5e-8, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 5e-7, + "output_cost_per_token": 2e-7, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-turbo-latest": { + "input_cost_per_token": 5e-8, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 5e-7, + "output_cost_per_token": 2e-7, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen3-30b-a3b": { + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen3-coder-flash": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "cache_read_input_token_cost": 8e-8, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.5e-6, + "range": [0, 32000.0] + }, + { + "cache_read_input_token_cost": 1.2e-7, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 2.5e-6, + "range": [32000.0, 128000.0] + }, + { + "cache_read_input_token_cost": 2e-7, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 4e-6, + "range": [128000.0, 256000.0] + }, + { + "cache_read_input_token_cost": 4e-7, + "input_cost_per_token": 1.6e-6, + "output_cost_per_token": 9.6e-6, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen3-coder-flash-2025-07-28": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.5e-6, + "range": [0, 32000.0] + }, + { + "input_cost_per_token": 5e-7, + "output_cost_per_token": 2.5e-6, + "range": [32000.0, 128000.0] + }, + { + "input_cost_per_token": 8e-7, + "output_cost_per_token": 4e-6, + "range": [128000.0, 256000.0] + }, + { + "input_cost_per_token": 1.6e-6, + "output_cost_per_token": 9.6e-6, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen3-coder-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "output_cost_per_token": 5e-6, + "range": [0, 32000.0] + }, + { + "cache_read_input_token_cost": 1.8e-7, + "input_cost_per_token": 1.8e-6, + "output_cost_per_token": 9e-6, + "range": [32000.0, 128000.0] + }, + { + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "output_cost_per_token": 1.5e-5, + "range": [128000.0, 256000.0] + }, + { + "cache_read_input_token_cost": 6e-7, + "input_cost_per_token": 6e-6, + "output_cost_per_token": 6e-5, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen3-coder-plus-2025-07-22": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 1e-6, + "output_cost_per_token": 5e-6, + "range": [0, 32000.0] + }, + { + "input_cost_per_token": 1.8e-6, + "output_cost_per_token": 9e-6, + "range": [32000.0, 128000.0] + }, + { + "input_cost_per_token": 3e-6, + "output_cost_per_token": 1.5e-5, + "range": [128000.0, 256000.0] + }, + { + "input_cost_per_token": 6e-6, + "output_cost_per_token": 6e-5, + "range": [256000.0, 1000000.0] + } + ] + }, + "dashscope/qwen3-max-preview": { + "litellm_provider": "dashscope", + "max_input_tokens": 258048, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 6e-6, + "range": [0, 32000.0] + }, + { + "input_cost_per_token": 2.4e-6, + "output_cost_per_token": 1.2e-5, + "range": [32000.0, 128000.0] + }, + { + "input_cost_per_token": 3e-6, + "output_cost_per_token": 1.5e-5, + "range": [128000.0, 252000.0] + } + ] + }, + "dashscope/qwq-plus": { + "input_cost_per_token": 8e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 98304, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.4e-6, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-bge-large-en": { + "input_cost_per_token": 1.0003e-7, + "input_dbu_cost_per_token": 1.429e-6, + "litellm_provider": "databricks", + "max_input_tokens": 512, + "max_tokens": 512, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-claude-3-7-sonnet": { + "input_cost_per_token": 2.9999900000000002e-6, + "input_dbu_cost_per_token": 4.2857e-5, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-5, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-haiku-4-5": { + "input_cost_per_token": 1.00002e-6, + "input_dbu_cost_per_token": 1.4286e-5, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.00003e-6, + "output_dbu_cost_per_token": 7.1429e-5, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4": { + "input_cost_per_token": 1.5000020000000002e-5, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 7.500003000000001e-5, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-1": { + "input_cost_per_token": 1.5000020000000002e-5, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 7.500003000000001e-5, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-5": { + "input_cost_per_token": 5.00003e-6, + "input_dbu_cost_per_token": 7.1429e-5, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.5000010000000002e-5, + "output_dbu_cost_per_token": 0.000357143, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4": { + "input_cost_per_token": 2.9999900000000002e-6, + "input_dbu_cost_per_token": 4.2857e-5, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-5, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-1": { + "input_cost_per_token": 2.9999900000000002e-6, + "input_dbu_cost_per_token": 4.2857e-5, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-5, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-5": { + "input_cost_per_token": 2.9999900000000002e-6, + "input_dbu_cost_per_token": 4.2857e-5, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-5, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-flash": { + "input_cost_per_token": 3.0001999999999996e-7, + "input_dbu_cost_per_token": 4.285999999999999e-6, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.49998e-6, + "output_dbu_cost_per_token": 3.5714e-5, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-pro": { + "input_cost_per_token": 1.24999e-6, + "input_dbu_cost_per_token": 1.7857e-5, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-6, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemma-3-12b": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 2.1429999999999996e-6, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.0001e-7, + "output_dbu_cost_per_token": 7.143e-6, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-5": { + "input_cost_per_token": 1.24999e-6, + "input_dbu_cost_per_token": 1.7857e-5, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-6, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-1": { + "input_cost_per_token": 1.24999e-6, + "input_dbu_cost_per_token": 1.7857e-5, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-6, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-mini": { + "input_cost_per_token": 2.4997000000000006e-7, + "input_dbu_cost_per_token": 3.571e-6, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.9999700000000004e-6, + "output_dbu_cost_per_token": 2.8571e-5, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-nano": { + "input_cost_per_token": 4.998e-8, + "input_dbu_cost_per_token": 7.14e-7, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.9998000000000007e-7, + "output_dbu_cost_per_token": 5.714000000000001e-6, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-oss-120b": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 2.1429999999999996e-6, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.9997e-7, + "output_dbu_cost_per_token": 8.571e-6, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-oss-20b": { + "input_cost_per_token": 7e-8, + "input_dbu_cost_per_token": 1e-6, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.0001999999999996e-7, + "output_dbu_cost_per_token": 4.285999999999999e-6, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gte-large-en": { + "input_cost_per_token": 1.2999000000000001e-7, + "input_dbu_cost_per_token": 1.857e-6, + "litellm_provider": "databricks", + "max_input_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-llama-2-70b-chat": { + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 7.143e-6, + "litellm_provider": "databricks", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000300000000002e-6, + "output_dbu_cost_per_token": 2.1429e-5, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-llama-4-maverick": { + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 7.143e-6, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Databricks documentation now provides both DBU costs (_dbu_cost_per_token) and dollar costs(_cost_per_token)." + }, + "mode": "chat", + "output_cost_per_token": 1.5000300000000002e-6, + "output_dbu_cost_per_token": 2.1429e-5, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-meta-llama-3-1-405b-instruct": { + "input_cost_per_token": 5.00003e-6, + "input_dbu_cost_per_token": 7.1429e-5, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-5, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-meta-llama-3-1-8b-instruct": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 2.1429999999999996e-6, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 4.5003000000000007e-7, + "output_dbu_cost_per_token": 6.429000000000001e-6, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-meta-llama-3-3-70b-instruct": { + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 7.143e-6, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000300000000002e-6, + "output_dbu_cost_per_token": 2.1429e-5, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-meta-llama-3-70b-instruct": { + "input_cost_per_token": 1.00002e-6, + "input_dbu_cost_per_token": 1.4286e-5, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.9999900000000002e-6, + "output_dbu_cost_per_token": 4.2857e-5, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-mixtral-8x7b-instruct": { + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 7.143e-6, + "litellm_provider": "databricks", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.00002e-6, + "output_dbu_cost_per_token": 1.4286e-5, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-mpt-30b-instruct": { + "input_cost_per_token": 1.00002e-6, + "input_dbu_cost_per_token": 1.4286e-5, + "litellm_provider": "databricks", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.00002e-6, + "output_dbu_cost_per_token": 1.4286e-5, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-mpt-7b-instruct": { + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 7.143e-6, + "litellm_provider": "databricks", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "dataforseo/search": { + "input_cost_per_query": 0.003, + "litellm_provider": "dataforseo", + "mode": "search" + }, + "davinci-002": { + "input_cost_per_token": 2e-6, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "deepgram/base": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/base-conversationalai": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/base-finance": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/base-general": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/base-meeting": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/base-phonecall": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/base-video": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/base-voicemail": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/enhanced": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/enhanced-finance": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/enhanced-general": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/enhanced-meeting": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/enhanced-phonecall": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-atc": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-automotive": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-conversationalai": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-drivethru": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-finance": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-general": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-meeting": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-phonecall": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-video": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-2-voicemail": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-3": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-3-general": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-3-medical": { + "input_cost_per_second": 8.667e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0052/60 seconds = $0.00008667 per second (multilingual)", + "original_pricing_per_minute": 0.0052 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-general": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/nova-phonecall": { + "input_cost_per_second": 7.167e-5, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/whisper": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/whisper-base": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/whisper-large": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/whisper-medium": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/whisper-small": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepgram/whisper-tiny": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "deepinfra/Gryphe/MythoMax-L2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 9e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-6, + "output_cost_per_token": 1e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Qwen/QwQ-32B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen2.5-72B-Instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen2.5-7B-Instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_vision": true + }, + "deepinfra/Qwen/Qwen3-14B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-235B-A22B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 9e-8, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 2.9e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-30B-A3B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 2.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-32B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 1.6e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 1.4e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 1.4e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 7.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 7.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/allenai/olmOCR-7B-0725-FP8": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 1.5e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/anthropic/claude-3-7-sonnet-latest": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 3.3e-6, + "output_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 3.3e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/anthropic/claude-4-opus": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 1.65e-5, + "output_cost_per_token": 8.25e-5, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/anthropic/claude-4-sonnet": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 3.3e-6, + "output_cost_per_token": 1.65e-5, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 2.4e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-0528": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 2.15e-6, + "cache_read_input_token_cost": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-6, + "output_cost_per_token": 3e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 2.7e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-6, + "output_cost_per_token": 3e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 8.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3-0324": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 8.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3.1": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 1e-6, + "cache_read_input_token_cost": 2.16e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_reasoning": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 1e-6, + "cache_read_input_token_cost": 2.16e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/google/gemini-2.0-flash-001": { + "max_tokens": 1000000, + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/google/gemini-2.5-flash": { + "max_tokens": 1000000, + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 2.5e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/google/gemini-2.5-pro": { + "max_tokens": 1000000, + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "input_cost_per_token": 1.25e-6, + "output_cost_per_token": 1e-5, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/google/gemma-3-12b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/google/gemma-3-27b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-8, + "output_cost_per_token": 1.6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/google/gemma-3-4b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 8e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4.9e-8, + "output_cost_per_token": 4.9e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/meta-llama/Llama-3.2-3B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 2e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Llama-3.3-70B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2.3e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 3.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "max_tokens": 1048576, + "max_input_tokens": 1048576, + "max_output_tokens": 1048576, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "max_tokens": 327680, + "max_input_tokens": 327680, + "max_output_tokens": 327680, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 3e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Llama-Guard-3-8B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5.5e-8, + "output_cost_per_token": 5.5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/meta-llama/Llama-Guard-4-12B": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 1.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-8, + "output_cost_per_token": 6e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 3e-8, + "output_cost_per_token": 5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/microsoft/WizardLM-2-8x22B": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 4.8e-7, + "output_cost_per_token": 4.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/microsoft/phi-4": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 1.4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 4e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 8e-8, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 2e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/moonshotai/Kimi-K2-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 2e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 2e-6, + "cache_read_input_token_cost": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/openai/gpt-oss-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/openai/gpt-oss-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/zai-org/GLM-4.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 1.6e-6, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepseek/deepseek-chat": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "deepseek", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.2e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-coder": { + "input_cost_per_token": 1.4e-7, + "input_cost_per_token_cache_hit": 1.4e-8, + "litellm_provider": "deepseek", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-r1": { + "input_cost_per_token": 5.5e-7, + "input_cost_per_token_cache_hit": 1.4e-7, + "litellm_provider": "deepseek", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.19e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-reasoner": { + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "deepseek", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.2e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-v3": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 7e-8, + "input_cost_per_token": 2.7e-7, + "input_cost_per_token_cache_hit": 7e-8, + "litellm_provider": "deepseek", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.1e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "deepseek", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "deepseek.v3-v1:0": { + "input_cost_per_token": 5.8e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 81920, + "max_tokens": 81920, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dolphin": { + "input_cost_per_token": 5e-7, + "litellm_provider": "nlp_cloud", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "completion", + "output_cost_per_token": 5e-7 + }, + "doubao-embedding": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - standard version with 2560 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2560 + }, + "doubao-embedding-large": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - large version with 2048 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2048 + }, + "doubao-embedding-large-text-240915": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - text-240915 version with 4096 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 4096 + }, + "doubao-embedding-large-text-250515": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - text-250515 version with 2048 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2048 + }, + "doubao-embedding-text-240715": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - text-240715 version with 2560 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2560 + }, + "exa_ai/search": { + "litellm_provider": "exa_ai", + "mode": "search", + "tiered_pricing": [ + { + "input_cost_per_query": 0.005, + "max_results_range": [0, 25] + }, + { + "input_cost_per_query": 0.025, + "max_results_range": [26, 100] + } + ] + }, + "firecrawl/search": { + "litellm_provider": "firecrawl", + "mode": "search", + "tiered_pricing": [ + { + "input_cost_per_query": 0.00166, + "max_results_range": [1, 10] + }, + { + "input_cost_per_query": 0.00332, + "max_results_range": [11, 20] + }, + { + "input_cost_per_query": 0.00498, + "max_results_range": [21, 30] + }, + { + "input_cost_per_query": 0.00664, + "max_results_range": [31, 40] + }, + { + "input_cost_per_query": 0.0083, + "max_results_range": [41, 50] + }, + { + "input_cost_per_query": 0.00996, + "max_results_range": [51, 60] + }, + { + "input_cost_per_query": 0.01162, + "max_results_range": [61, 70] + }, + { + "input_cost_per_query": 0.01328, + "max_results_range": [71, 80] + }, + { + "input_cost_per_query": 0.01494, + "max_results_range": [81, 90] + }, + { + "input_cost_per_query": 0.0166, + "max_results_range": [91, 100] + } + ], + "metadata": { + "notes": "Firecrawl search pricing: $83 for 100,000 credits, 2 credits per 10 results. Cost = ceiling(limit/10) * 2 * $0.00083" + } + }, + "perplexity/search": { + "input_cost_per_query": 0.005, + "litellm_provider": "perplexity", + "mode": "search" + }, + "searxng/search": { + "litellm_provider": "searxng", + "mode": "search", + "input_cost_per_query": 0.0, + "metadata": { + "notes": "SearXNG is an open-source metasearch engine. Free to use when self-hosted or using public instances." + } + }, + "elevenlabs/scribe_v1": { + "input_cost_per_second": 6.11e-5, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", + "notes": "ElevenLabs Scribe v1 - state-of-the-art speech recognition model with 99 language support", + "original_pricing_per_hour": 0.22 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "elevenlabs/scribe_v1_experimental": { + "input_cost_per_second": 6.11e-5, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", + "notes": "ElevenLabs Scribe v1 experimental - enhanced version of the main Scribe model", + "original_pricing_per_hour": 0.22 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "embed-english-light-v2.0": { + "input_cost_per_token": 1e-7, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-english-light-v3.0": { + "input_cost_per_token": 1e-7, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-english-v2.0": { + "input_cost_per_token": 1e-7, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_tokens": 4096, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-english-v3.0": { + "input_cost_per_image": 0.0001, + "input_cost_per_token": 1e-7, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "metadata": { + "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "embed-multilingual-v2.0": { + "input_cost_per_token": 1e-7, + "litellm_provider": "cohere", + "max_input_tokens": 768, + "max_tokens": 768, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-multilingual-v3.0": { + "input_cost_per_token": 1e-7, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "embed-multilingual-light-v3.0": { + "input_cost_per_token": 0.0001, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "eu.amazon.nova-lite-v1:0": { + "input_cost_per_token": 7.8e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.12e-7, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "eu.amazon.nova-micro-v1:0": { + "input_cost_per_token": 4.6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.84e-7, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "eu.amazon.nova-pro-v1:0": { + "input_cost_per_token": 1.05e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 4.2e-6, + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-6, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 1.1e-6, + "deprecation_date": "2026-10-15", + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-6, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-7-sonnet-20250219-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-opus-20240229-v1:0": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "eu.anthropic.claude-opus-4-1-20250805-v1:0": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "eu.anthropic.claude-opus-4-20250514-v1:0": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "eu.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-6, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 3.3e-6, + "input_cost_per_token_above_200k_tokens": 6.6e-6, + "output_cost_per_token_above_200k_tokens": 2.475e-5, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "eu.meta.llama3-2-1b-instruct-v1:0": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.3e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "eu.meta.llama3-2-3b-instruct-v1:0": { + "input_cost_per_token": 1.9e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.9e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "eu.mistral.pixtral-large-2502-v1:0": { + "input_cost_per_token": 2e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "fal_ai/bria/text-to-image/3.2": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/flux-pro/v1.1": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/flux-pro/v1.1-ultra": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/flux/schnell": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.003, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/bytedance/seedream/v3/text-to-image": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.03, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.03, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/ideogram/v3": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/imagen4/preview": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/imagen4/preview/fast": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/imagen4/preview/ultra": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/recraft/v3/text-to-image": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/stable-diffusion-v35-medium": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "featherless_ai/featherless-ai/Qwerky-72B": { + "litellm_provider": "featherless_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat" + }, + "featherless_ai/featherless-ai/Qwerky-QwQ-32B": { + "litellm_provider": "featherless_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat" + }, + "fireworks-ai-4.1b-to-16b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 2e-7 + }, + "fireworks-ai-56b-to-176b": { + "input_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 1.2e-6 + }, + "fireworks-ai-above-16b": { + "input_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 9e-7 + }, + "fireworks-ai-default": { + "input_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 0.0 + }, + "fireworks-ai-embedding-150m-to-350m": { + "input_cost_per_token": 1.6e-8, + "litellm_provider": "fireworks_ai-embedding-models", + "output_cost_per_token": 0.0 + }, + "fireworks-ai-embedding-up-to-150m": { + "input_cost_per_token": 8e-9, + "litellm_provider": "fireworks_ai-embedding-models", + "output_cost_per_token": 0.0 + }, + "fireworks-ai-moe-up-to-56b": { + "input_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 5e-7 + }, + "fireworks-ai-up-to-4b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 2e-7 + }, + "fireworks_ai/WhereIsAI/UAE-Large-V1": { + "input_cost_per_token": 1.6e-8, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { + "input_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1": { + "input_cost_per_token": 3e-6, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 20480, + "max_tokens": 20480, + "mode": "chat", + "output_cost_per_token": 8e-6, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-0528": { + "input_cost_per_token": 3e-6, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 160000, + "max_output_tokens": 160000, + "max_tokens": 160000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-basic": { + "input_cost_per_token": 5.5e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 20480, + "max_tokens": 20480, + "mode": "chat", + "output_cost_per_token": 2.19e-6, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3": { + "input_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3-0324": { + "input_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://fireworks.ai/models/fireworks/deepseek-v3-0324", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p1": { + "input_cost_per_token": 5.6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "source": "https://fireworks.ai/pricing", + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p1-terminus": { + "input_cost_per_token": 5.6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "source": "https://fireworks.ai/pricing", + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p2": { + "input_cost_per_token": 5.6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "source": "https://fireworks.ai/models/fireworks/deepseek-v3p2", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/firefunction-v2": { + "input_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/glm-4p5": { + "input_cost_per_token": 5.5e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 96000, + "max_tokens": 96000, + "mode": "chat", + "output_cost_per_token": 2.19e-6, + "source": "https://fireworks.ai/models/fireworks/glm-4p5", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/glm-4p5-air": { + "input_cost_per_token": 2.2e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 96000, + "max_tokens": 96000, + "mode": "chat", + "output_cost_per_token": 8.8e-7, + "source": "https://artificialanalysis.ai/models/glm-4-5-air", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/glm-4p6": { + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 2.19e-6, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-20b": { + "input_cost_per_token": 5e-8, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct": { + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://fireworks.ai/models/fireworks/kimi-k2-instruct", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905": { + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://app.fireworks.ai/models/fireworks/kimi-k2-instruct-0905", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": { + "input_cost_per_token": 3e-6, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-8b-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-90b-vision-instruct": { + "input_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "fireworks_ai/accounts/fireworks/models/llama4-maverick-instruct-basic": { + "input_cost_per_token": 2.2e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8.8e-7, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama4-scout-instruct-basic": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { + "input_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": { + "input_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct": { + "input_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/yi-large": { + "input_cost_per_token": 3e-6, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/nomic-ai/nomic-embed-text-v1": { + "input_cost_per_token": 8e-9, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/thenlper/gte-base": { + "input_cost_per_token": 8e-9, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/thenlper/gte-large": { + "input_cost_per_token": 1.6e-8, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "friendliai/meta-llama-3.1-70b-instruct": { + "input_cost_per_token": 6e-7, + "litellm_provider": "friendliai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "friendliai/meta-llama-3.1-8b-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "friendliai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:babbage-002": { + "input_cost_per_token": 1.6e-6, + "input_cost_per_token_batches": 2e-7, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 1.6e-6, + "output_cost_per_token_batches": 2e-7 + }, + "ft:davinci-002": { + "input_cost_per_token": 1.2e-5, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_batches": 1e-6 + }, + "ft:gpt-3.5-turbo": { + "input_cost_per_token": 3e-6, + "input_cost_per_token_batches": 1.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "output_cost_per_token_batches": 3e-6, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-3.5-turbo-0125": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-3.5-turbo-0613": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-3.5-turbo-1106": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4-0613": { + "input_cost_per_token": 3e-5, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-5, + "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4o-2024-08-06": { + "cache_read_input_token_cost": 1.875e-6, + "input_cost_per_token": 3.75e-6, + "input_cost_per_token_batches": 1.875e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_batches": 7.5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "ft:gpt-4o-2024-11-20": { + "cache_creation_input_token_cost": 1.875e-6, + "input_cost_per_token": 3.75e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 3e-7, + "input_cost_per_token_batches": 1.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "output_cost_per_token_batches": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-2025-04-14": { + "cache_read_input_token_cost": 7.5e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_batches": 1.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_batches": 6e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-mini-2025-04-14": { + "cache_read_input_token_cost": 2e-7, + "input_cost_per_token": 8e-7, + "input_cost_per_token_batches": 4e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3.2e-6, + "output_cost_per_token_batches": 1.6e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-nano-2025-04-14": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_batches": 1e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-7, + "output_cost_per_token_batches": 4e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:o4-mini-2025-04-16": { + "cache_read_input_token_cost": 1e-6, + "input_cost_per_token": 4e-6, + "input_cost_per_token_batches": 2e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 1.6e-5, + "output_cost_per_token_batches": 8e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "gemini-1.0-pro": { + "input_cost_per_character": 1.25e-7, + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 0.002, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 3.75e-7, + "output_cost_per_token": 1.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-1.0-pro-001": { + "deprecation_date": "2025-04-09", + "input_cost_per_character": 1.25e-7, + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 0.002, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 3.75e-7, + "output_cost_per_token": 1.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-1.0-pro-002": { + "deprecation_date": "2025-04-09", + "input_cost_per_character": 1.25e-7, + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 0.002, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 3.75e-7, + "output_cost_per_token": 1.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-1.0-pro-vision": { + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "litellm_provider": "vertex_ai-vision-models", + "max_images_per_prompt": 16, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_tokens": 2048, + "max_video_length": 2, + "max_videos_per_prompt": 1, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.0-pro-vision-001": { + "deprecation_date": "2025-04-09", + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "litellm_provider": "vertex_ai-vision-models", + "max_images_per_prompt": 16, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_tokens": 2048, + "max_video_length": 2, + "max_videos_per_prompt": 1, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.0-ultra": { + "input_cost_per_character": 1.25e-7, + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 0.002, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_character": 3.75e-7, + "output_cost_per_token": 1.5e-6, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-1.0-ultra-001": { + "input_cost_per_character": 1.25e-7, + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 0.002, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_character": 3.75e-7, + "output_cost_per_token": 1.5e-6, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-1.5-flash": { + "deprecation_date": "2025-09-29", + "input_cost_per_audio_per_second": 2e-6, + "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_character": 1.875e-8, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image": 2e-5, + "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1e-6, + "input_cost_per_video_per_second": 2e-5, + "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 7.5e-8, + "output_cost_per_character_above_128k_tokens": 1.5e-7, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-flash-001": { + "deprecation_date": "2025-05-24", + "input_cost_per_audio_per_second": 2e-6, + "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_character": 1.875e-8, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image": 2e-5, + "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1e-6, + "input_cost_per_video_per_second": 2e-5, + "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 7.5e-8, + "output_cost_per_character_above_128k_tokens": 1.5e-7, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-flash-002": { + "deprecation_date": "2025-09-24", + "input_cost_per_audio_per_second": 2e-6, + "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_character": 1.875e-8, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image": 2e-5, + "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1e-6, + "input_cost_per_video_per_second": 2e-5, + "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 7.5e-8, + "output_cost_per_character_above_128k_tokens": 1.5e-7, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-flash-exp-0827": { + "deprecation_date": "2025-09-29", + "input_cost_per_audio_per_second": 2e-6, + "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_character": 1.875e-8, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image": 2e-5, + "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_token": 4.688e-9, + "input_cost_per_token_above_128k_tokens": 1e-6, + "input_cost_per_video_per_second": 2e-5, + "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 1.875e-8, + "output_cost_per_character_above_128k_tokens": 3.75e-8, + "output_cost_per_token": 4.6875e-9, + "output_cost_per_token_above_128k_tokens": 9.375e-9, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-flash-preview-0514": { + "deprecation_date": "2025-09-29", + "input_cost_per_audio_per_second": 2e-6, + "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_character": 1.875e-8, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image": 2e-5, + "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1e-6, + "input_cost_per_video_per_second": 2e-5, + "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 1.875e-8, + "output_cost_per_character_above_128k_tokens": 3.75e-8, + "output_cost_per_token": 4.6875e-9, + "output_cost_per_token_above_128k_tokens": 9.375e-9, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-pro": { + "deprecation_date": "2025-09-29", + "input_cost_per_audio_per_second": 3.125e-5, + "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_character": 3.125e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "input_cost_per_image": 0.00032875, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1.25e-6, + "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_token": 5e-6, + "output_cost_per_token_above_128k_tokens": 1e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-pro-001": { + "deprecation_date": "2025-05-24", + "input_cost_per_audio_per_second": 3.125e-5, + "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_character": 3.125e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "input_cost_per_image": 0.00032875, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1.25e-6, + "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_token": 5e-6, + "output_cost_per_token_above_128k_tokens": 1e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-pro-002": { + "deprecation_date": "2025-09-24", + "input_cost_per_audio_per_second": 3.125e-5, + "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_character": 3.125e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "input_cost_per_image": 0.00032875, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1.25e-6, + "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_token": 5e-6, + "output_cost_per_token_above_128k_tokens": 1e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-1.5-pro-preview-0215": { + "deprecation_date": "2025-09-29", + "input_cost_per_audio_per_second": 3.125e-5, + "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_character": 3.125e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "input_cost_per_image": 0.00032875, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_token": 7.8125e-8, + "input_cost_per_token_above_128k_tokens": 1.5625e-7, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1.25e-6, + "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_token": 3.125e-7, + "output_cost_per_token_above_128k_tokens": 6.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gemini-1.5-pro-preview-0409": { + "deprecation_date": "2025-09-29", + "input_cost_per_audio_per_second": 3.125e-5, + "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_character": 3.125e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "input_cost_per_image": 0.00032875, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_token": 7.8125e-8, + "input_cost_per_token_above_128k_tokens": 1.5625e-7, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1.25e-6, + "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_token": 3.125e-7, + "output_cost_per_token_above_128k_tokens": 6.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "gemini-1.5-pro-preview-0514": { + "deprecation_date": "2025-09-29", + "input_cost_per_audio_per_second": 3.125e-5, + "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_character": 3.125e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "input_cost_per_image": 0.00032875, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_token": 7.8125e-8, + "input_cost_per_token_above_128k_tokens": 1.5625e-7, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1.25e-6, + "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_token": 3.125e-7, + "output_cost_per_token_above_128k_tokens": 6.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gemini-2.0-flash": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-7, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-flash-001": { + "cache_read_input_token_cost": 3.75e-8, + "deprecation_date": "2026-02-05", + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-flash-exp": { + "cache_read_input_token_cost": 3.75e-8, + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 1.5e-7, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 6e-7, + "output_cost_per_token_above_128k_tokens": 0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-flash-lite": { + "cache_read_input_token_cost": 1.875e-8, + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-flash-lite-001": { + "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2026-02-25", + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-flash-live-preview-04-09": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 3e-6, + "input_cost_per_image": 3e-6, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 3e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_token": 2e-6, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini-2.0-flash-preview-image-generation": { + "deprecation_date": "2025-11-14", + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-7, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-flash-thinking-exp": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 0.0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-flash-thinking-exp-01-21": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 0.0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": false, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.0-pro-exp-02-05": { + "cache_read_input_token_cost": 3.125e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-5, + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": false, + "tpm": 8000000 + }, + "gemini-2.5-flash-image-preview": { + "deprecation_date": "2026-01-15", + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_image_token": 3e-7, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-5, + "output_cost_per_reasoning_token": 3e-5, + "output_cost_per_token": 3e-5, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_batches": 6e-6, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash-lite": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash-lite-preview-09-2025": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 3e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash-preview-09-2025": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-live-2.5-flash-preview-native-audio-09-2025": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 3e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_token": 2e-6, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini/gemini-live-2.5-flash-preview-native-audio-09-2025": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 3e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_token": 2e-6, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "gemini-2.5-flash-lite-preview-06-17": { + "deprecation_date": "2025-11-18", + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash-preview-04-17": { + "cache_read_input_token_cost": 3.75e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_token": 6e-7, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash-preview-05-20": { + "deprecation_date": "2025-11-18", + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-pro": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "vertex_ai/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "vertex_ai/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 5e-7, + "input_cost_per_audio_token": 1e-6, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-pro-exp-03-25": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-pro-preview-03-25": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 1.25e-6, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-pro-preview-05-06": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 1.25e-6, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supported_regions": ["global"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-pro-preview-06-05": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 1.25e-6, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-pro-preview-tts": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-computer-use-preview-10-2025": { + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_images_per_prompt": 3000, + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/computer-use", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_computer_use": true, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-embedding-001": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "gemini-flash-experimental": { + "input_cost_per_character": 0, + "input_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_token": 0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", + "supports_function_calling": false, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-pro": { + "input_cost_per_character": 1.25e-7, + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 0.002, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 3.75e-7, + "output_cost_per_token": 1.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-pro-experimental": { + "input_cost_per_character": 0, + "input_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_token": 0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", + "supports_function_calling": false, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-pro-vision": { + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "litellm_provider": "vertex_ai-vision-models", + "max_images_per_prompt": 16, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_tokens": 2048, + "max_video_length": 2, + "max_videos_per_prompt": 1, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/gemini-embedding-001": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "gemini", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/embeddings#model-versions", + "tpm": 10000000 + }, + "gemini/gemini-1.5-flash": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "rpm": 2000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-001": { + "cache_creation_input_token_cost": 1e-6, + "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2025-05-24", + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "rpm": 2000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-002": { + "cache_creation_input_token_cost": 1e-6, + "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2025-09-24", + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "rpm": 2000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-8b": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 4000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-8b-exp-0827": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 4000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-8b-exp-0924": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 4000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-exp-0827": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 2000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-latest": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "rpm": 2000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 3.5e-6, + "input_cost_per_token_above_128k_tokens": 7e-6, + "litellm_provider": "gemini", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-5, + "output_cost_per_token_above_128k_tokens": 2.1e-5, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro-001": { + "deprecation_date": "2025-05-24", + "input_cost_per_token": 3.5e-6, + "input_cost_per_token_above_128k_tokens": 7e-6, + "litellm_provider": "gemini", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-5, + "output_cost_per_token_above_128k_tokens": 2.1e-5, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro-002": { + "deprecation_date": "2025-09-24", + "input_cost_per_token": 3.5e-6, + "input_cost_per_token_above_128k_tokens": 7e-6, + "litellm_provider": "gemini", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-5, + "output_cost_per_token_above_128k_tokens": 2.1e-5, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro-exp-0801": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 3.5e-6, + "input_cost_per_token_above_128k_tokens": 7e-6, + "litellm_provider": "gemini", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-5, + "output_cost_per_token_above_128k_tokens": 2.1e-5, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro-exp-0827": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro-latest": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 3.5e-6, + "input_cost_per_token_above_128k_tokens": 7e-6, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-6, + "output_cost_per_token_above_128k_tokens": 2.1e-5, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-2.0-flash": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-7, + "rpm": 10000, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-2.0-flash-001": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-7, + "rpm": 10000, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-2.0-flash-exp": { + "cache_read_input_token_cost": 0.0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000 + }, + "gemini/gemini-2.0-flash-lite": { + "cache_read_input_token_cost": 1.875e-8, + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "rpm": 4000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000 + }, + "gemini/gemini-2.0-flash-lite-preview-02-05": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.875e-8, + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "rpm": 60000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-2.0-flash-live-001": { + "deprecation_date": "2025-12-09", + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 2.1e-6, + "input_cost_per_image": 2.1e-6, + "input_cost_per_token": 3.5e-7, + "input_cost_per_video_per_second": 2.1e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_audio_token": 8.5e-6, + "output_cost_per_token": 1.5e-6, + "rpm": 10, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.0-flash-preview-image-generation": { + "deprecation_date": "2025-11-14", + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-7, + "rpm": 10000, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-2.0-flash-thinking-exp": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 0.0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000 + }, + "gemini/gemini-2.0-flash-thinking-exp-01-21": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 0.0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000 + }, + "gemini/gemini-2.0-pro-exp-02-05": { + "cache_read_input_token_cost": 0.0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 2, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 1000000 + }, + "gemini/gemini-2.5-flash": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "gemini/gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "supports_reasoning": false, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-5, + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "gemini/gemini-2.5-flash-image-preview": { + "deprecation_date": "2026-01-15", + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-5, + "output_cost_per_reasoning_token": 3e-5, + "output_cost_per_token": 3e-5, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "gemini/gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "gemini", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-5, + "rpm": 1000, + "tpm": 4000000, + "output_cost_per_token_batches": 6e-6, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini/gemini-2.5-flash-lite": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-flash-lite-preview-09-2025": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 3e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-flash-preview-09-2025": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-flash-latest": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-flash-lite-latest": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 3e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-flash-lite-preview-06-17": { + "deprecation_date": "2025-11-18", + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-flash-preview-04-17": { + "cache_read_input_token_cost": 3.75e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_token": 6e-7, + "rpm": 10, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-flash-preview-05-20": { + "deprecation_date": "2025-11-18", + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 10, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-flash-preview-tts": { + "cache_read_input_token_cost": 3.75e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_token": 6e-7, + "rpm": 10, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-pro": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "gemini/gemini-2.5-computer-use-preview-10-2025": { + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "gemini", + "max_images_per_prompt": 3000, + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/computer-use", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_computer_use": true, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 800000 + }, + "gemini/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "gemini/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-6, + "output_cost_per_token": 3e-6, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-6, + "output_cost_per_token": 3e-6, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini/gemini-2.5-pro-exp-03-25": { + "cache_read_input_token_cost": 0.0, + "input_cost_per_token": 0.0, + "input_cost_per_token_above_200k_tokens": 0.0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.0, + "output_cost_per_token_above_200k_tokens": 0.0, + "rpm": 5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-pro-preview-03-25": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-2.5-pro-preview-05-06": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-2.5-pro-preview-06-05": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-2.5-pro-preview-tts": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_above_200k_tokens": 2.5e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_above_200k_tokens": 1.5e-5, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000 + }, + "gemini/gemini-exp-1114": { + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro.", + "supports_tool_choice": true + }, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-exp-1206": { + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1206. Assuming same as gemini-1.5-pro.", + "supports_tool_choice": true + }, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-gemma-2-27b-it": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "gemini", + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/gemini-gemma-2-9b-it": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "gemini", + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/gemini-pro": { + "input_cost_per_token": 3.5e-7, + "input_cost_per_token_above_128k_tokens": 7e-7, + "litellm_provider": "gemini", + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-6, + "output_cost_per_token_above_128k_tokens": 2.1e-6, + "rpd": 30000, + "rpm": 360, + "source": "https://ai.google.dev/gemini-api/docs/models/gemini", + "supports_function_calling": true, + "supports_tool_choice": true, + "tpm": 120000 + }, + "gemini/gemini-pro-vision": { + "input_cost_per_token": 3.5e-7, + "input_cost_per_token_above_128k_tokens": 7e-7, + "litellm_provider": "gemini", + "max_input_tokens": 30720, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.05e-6, + "output_cost_per_token_above_128k_tokens": 2.1e-6, + "rpd": 30000, + "rpm": 360, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 120000 + }, + "gemini/gemma-3-27b-it": { + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "source": "https://aistudio.google.com", + "supports_audio_output": false, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/imagen-3.0-fast-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-3.0-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-3.0-generate-002": { + "deprecation_date": "2025-11-10", + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-4.0-fast-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-4.0-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-4.0-ultra-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/learnlm-1.5-pro-experimental": { + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_input_tokens": 32767, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "source": "https://aistudio.google.com", + "supports_audio_output": false, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/veo-2.0-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.35, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.0-fast-generate-preview": { + "deprecation_date": "2025-11-12", + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.0-generate-preview": { + "deprecation_date": "2025-11-12", + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.75, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.1-fast-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.1-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.1-fast-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.1-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "github_copilot/claude-haiku-4.5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/claude-opus-4.5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/claude-opus-41": { + "litellm_provider": "github_copilot", + "max_input_tokens": 80000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supports_vision": true + }, + "github_copilot/claude-sonnet-4": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/claude-sonnet-4.5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gemini-2.5-pro": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gemini-3-pro-preview": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-3.5-turbo": { + "litellm_provider": "github_copilot", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-3.5-turbo-0613": { + "litellm_provider": "github_copilot", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-4": { + "litellm_provider": "github_copilot", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-4-0613": { + "litellm_provider": "github_copilot", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-4-o-preview": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-4.1": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-4.1-2025-04-14": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-41-copilot": { + "litellm_provider": "github_copilot", + "mode": "completion" + }, + "github_copilot/gpt-4o": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-4o-2024-05-13": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-4o-2024-08-06": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-4o-2024-11-20": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-4o-mini": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-4o-mini-2024-07-18": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5-mini": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5.1": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5.1-codex-max": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": ["/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5.2": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/text-embedding-3-small": { + "litellm_provider": "github_copilot", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding" + }, + "github_copilot/text-embedding-3-small-inference": { + "litellm_provider": "github_copilot", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding" + }, + "github_copilot/text-embedding-ada-002": { + "litellm_provider": "github_copilot", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding" + }, + "chatgpt/gpt-5.2-codex": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": ["/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.2": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "responses", + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.1-codex-max": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": ["/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.1-codex-mini": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "responses", + "supported_endpoints": ["/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "gigachat/GigaChat-2-Lite": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true, + "supports_system_messages": true + }, + "gigachat/GigaChat-2-Max": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_vision": true + }, + "gigachat/GigaChat-2-Pro": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_vision": true + }, + "gigachat/Embeddings": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "gigachat/Embeddings-2": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "gigachat/EmbeddingsGigaR": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 4096, + "max_tokens": 4096, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2560 + }, + "google.gemma-3-12b-it": { + "input_cost_per_token": 9e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.9e-7, + "supports_system_messages": true, + "supports_vision": true + }, + "google.gemma-3-27b-it": { + "input_cost_per_token": 2.3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.8e-7, + "supports_system_messages": true, + "supports_vision": true + }, + "google.gemma-3-4b-it": { + "input_cost_per_token": 4e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-8, + "supports_system_messages": true, + "supports_vision": true + }, + "google_pse/search": { + "input_cost_per_query": 0.005, + "litellm_provider": "google_pse", + "mode": "search" + }, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "global.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "gpt-3.5-turbo": { + "input_cost_per_token": 5e-7, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-0125": { + "input_cost_per_token": 5e-7, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-0301": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-0613": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-1106": { + "deprecation_date": "2026-09-28", + "input_cost_per_token": 1e-6, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-16k": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-16k-0613": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-instruct": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "gpt-3.5-turbo-instruct-0914": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 8192, + "max_output_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-6 + }, + "gpt-4": { + "input_cost_per_token": 3e-5, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-0125-preview": { + "deprecation_date": "2026-03-26", + "input_cost_per_token": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-0314": { + "input_cost_per_token": 3e-5, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-0613": { + "deprecation_date": "2025-06-06", + "input_cost_per_token": 3e-5, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-1106-preview": { + "deprecation_date": "2026-03-26", + "input_cost_per_token": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-1106-vision-preview": { + "deprecation_date": "2024-12-06", + "input_cost_per_token": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4-32k": { + "input_cost_per_token": 6e-5, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.00012, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-32k-0314": { + "input_cost_per_token": 6e-5, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.00012, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-32k-0613": { + "input_cost_per_token": 6e-5, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.00012, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-turbo": { + "input_cost_per_token": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4-turbo-2024-04-09": { + "input_cost_per_token": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4-turbo-preview": { + "input_cost_per_token": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-vision-preview": { + "deprecation_date": "2024-12-06", + "input_cost_per_token": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4.1": { + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_priority": 8.75e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-6, + "output_cost_per_token_batches": 4e-6, + "output_cost_per_token_priority": 1.4e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4.1-2025-04-14": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-6, + "output_cost_per_token_batches": 4e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4.1-mini": { + "cache_read_input_token_cost": 1e-7, + "cache_read_input_token_cost_priority": 1.75e-7, + "input_cost_per_token": 4e-7, + "input_cost_per_token_batches": 2e-7, + "input_cost_per_token_priority": 7e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-6, + "output_cost_per_token_batches": 8e-7, + "output_cost_per_token_priority": 2.8e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4.1-mini-2025-04-14": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 4e-7, + "input_cost_per_token_batches": 2e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-6, + "output_cost_per_token_batches": 8e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4.1-nano": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_priority": 5e-8, + "input_cost_per_token": 1e-7, + "input_cost_per_token_batches": 5e-8, + "input_cost_per_token_priority": 2e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "output_cost_per_token_batches": 2e-7, + "output_cost_per_token_priority": 8e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4.1-nano-2025-04-14": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 1e-7, + "input_cost_per_token_batches": 5e-8, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "output_cost_per_token_batches": 2e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4.5-preview": { + "cache_read_input_token_cost": 3.75e-5, + "input_cost_per_token": 7.5e-5, + "input_cost_per_token_batches": 3.75e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.00015, + "output_cost_per_token_batches": 7.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4.5-preview-2025-02-27": { + "cache_read_input_token_cost": 3.75e-5, + "deprecation_date": "2025-07-14", + "input_cost_per_token": 7.5e-5, + "input_cost_per_token_batches": 3.75e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.00015, + "output_cost_per_token_batches": 7.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o": { + "cache_read_input_token_cost": 1.25e-6, + "cache_read_input_token_cost_priority": 2.125e-6, + "input_cost_per_token": 2.5e-6, + "input_cost_per_token_batches": 1.25e-6, + "input_cost_per_token_priority": 4.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_batches": 5e-6, + "output_cost_per_token_priority": 1.7e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-2024-05-13": { + "input_cost_per_token": 5e-6, + "input_cost_per_token_batches": 2.5e-6, + "input_cost_per_token_priority": 8.75e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_batches": 7.5e-6, + "output_cost_per_token_priority": 2.625e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-2024-08-06": { + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "input_cost_per_token_batches": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_batches": 5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-2024-11-20": { + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "input_cost_per_token_batches": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_batches": 5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-audio-preview": { + "input_cost_per_audio_token": 0.0001, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.0002, + "output_cost_per_token": 1e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-audio-preview-2024-10-01": { + "input_cost_per_audio_token": 0.0001, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.0002, + "output_cost_per_token": 1e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 1e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-audio-preview-2025-06-03": { + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 1e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini": { + "cache_read_input_token_cost": 7.5e-8, + "cache_read_input_token_cost_priority": 1.25e-7, + "input_cost_per_token": 1.5e-7, + "input_cost_per_token_batches": 7.5e-8, + "input_cost_per_token_priority": 2.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "output_cost_per_token_batches": 3e-7, + "output_cost_per_token_priority": 1e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.5e-7, + "input_cost_per_token_batches": 7.5e-8, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "output_cost_per_token_batches": 3e-7, + "search_context_cost_per_query": { + "search_context_size_high": 0.03, + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275 + }, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-mini-audio-preview": { + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 6e-7, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 6e-7, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-realtime-preview": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-search-preview": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.5e-7, + "input_cost_per_token_batches": 7.5e-8, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "output_cost_per_token_batches": 3e-7, + "search_context_cost_per_query": { + "search_context_size_high": 0.03, + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275 + }, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4o-mini-search-preview-2025-03-11": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.5e-7, + "input_cost_per_token_batches": 7.5e-8, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "output_cost_per_token_batches": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-mini-transcribe": { + "input_cost_per_audio_token": 3e-6, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 5e-6, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "gpt-4o-mini-tts": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openai", + "mode": "audio_speech", + "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] + }, + "gpt-4o-realtime-preview": { + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 2e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-realtime-preview-2024-10-01": { + "cache_creation_input_audio_token_cost": 2e-5, + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_audio_token": 0.0001, + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.0002, + "output_cost_per_token": 2e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 2e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-realtime-preview-2025-06-03": { + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 2e-5, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-search-preview": { + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "input_cost_per_token_batches": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_batches": 5e-6, + "search_context_cost_per_query": { + "search_context_size_high": 0.05, + "search_context_size_low": 0.03, + "search_context_size_medium": 0.035 + }, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4o-search-preview-2025-03-11": { + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "input_cost_per_token_batches": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_batches": 5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-transcribe": { + "input_cost_per_audio_token": 6e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "gpt-image-1.5": { + "cache_read_input_image_token_cost": 2e-6, + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_token": 1e-5, + "input_cost_per_image_token": 8e-6, + "output_cost_per_image_token": 3.2e-5, + "supported_endpoints": ["/v1/images/generations"], + "supports_vision": true, + "supports_pdf_input": true + }, + "gpt-image-1.5-2025-12-16": { + "cache_read_input_image_token_cost": 2e-6, + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_token": 1e-5, + "input_cost_per_image_token": 8e-6, + "output_cost_per_image_token": 3.2e-5, + "supported_endpoints": ["/v1/images/generations"], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.034, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.133, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.034, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.133, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"], + "supports_vision": true, + "supports_pdf_input": true + }, + "gpt-5": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_flex": 6.25e-8, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_flex": 6.25e-7, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_flex": 5e-6, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.1": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.1-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.1-chat-latest": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "gpt-5.2": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 1.75e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "output_cost_per_token_priority": 2.8e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.2-2025-12-11": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 1.75e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "output_cost_per_token_priority": 2.8e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.2-chat-latest": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 1.75e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "output_cost_per_token_priority": 2.8e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.2-pro": { + "input_cost_per_token": 2.1e-5, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5.2-pro-2025-12-11": { + "input_cost_per_token": 2.1e-5, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-pro": { + "input_cost_per_token": 1.5e-5, + "input_cost_per_token_batches": 7.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 6e-5, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-pro-2025-10-06": { + "input_cost_per_token": 1.5e-5, + "input_cost_per_token_batches": 7.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 6e-5, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_flex": 6.25e-8, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_flex": 6.25e-7, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_flex": 5e-6, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5-chat": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "gpt-5-chat-latest": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "gpt-5-codex": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-6, + "output_cost_per_token_priority": 3.6e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.2-codex": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 1.75e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.4e-5, + "output_cost_per_token_priority": 2.8e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5-mini": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_flex": 1.25e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_flex": 1.25e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "output_cost_per_token_flex": 1e-6, + "output_cost_per_token_priority": 3.6e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_flex": 1.25e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_flex": 1.25e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "output_cost_per_token_flex": 1e-6, + "output_cost_per_token_priority": 3.6e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5-nano": { + "cache_read_input_token_cost": 5e-9, + "cache_read_input_token_cost_flex": 2.5e-9, + "input_cost_per_token": 5e-8, + "input_cost_per_token_flex": 2.5e-8, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "output_cost_per_token_flex": 2e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5e-9, + "cache_read_input_token_cost_flex": 2.5e-9, + "input_cost_per_token": 5e-8, + "input_cost_per_token_flex": 2.5e-8, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "output_cost_per_token_flex": 2e-7, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-image-1": { + "cache_read_input_image_token_cost": 2.5e-6, + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_image_token": 1e-5, + "input_cost_per_token": 5e-6, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_image_token": 4e-5, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, + "gpt-image-1-mini": { + "cache_read_input_image_token_cost": 2.5e-7, + "cache_read_input_token_cost": 2e-7, + "input_cost_per_image_token": 2.5e-6, + "input_cost_per_token": 2e-6, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_image_token": 8e-6, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, + "gpt-realtime": { + "cache_creation_input_audio_token_cost": 4e-7, + "cache_read_input_token_cost": 4e-7, + "input_cost_per_audio_token": 3.2e-5, + "input_cost_per_image": 5e-6, + "input_cost_per_token": 4e-6, + "litellm_provider": "openai", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-5, + "output_cost_per_token": 1.6e-5, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-mini": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-2025-08-28": { + "cache_creation_input_audio_token_cost": 4e-7, + "cache_read_input_token_cost": 4e-7, + "input_cost_per_audio_token": 3.2e-5, + "input_cost_per_image": 5e-6, + "input_cost_per_token": 4e-6, + "litellm_provider": "openai", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-5, + "output_cost_per_token": 1.6e-5, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gradient_ai/alibaba-qwen3-32b": { + "litellm_provider": "gradient_ai", + "max_tokens": 2048, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/anthropic-claude-3-opus": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/anthropic-claude-3.5-haiku": { + "input_cost_per_token": 8e-7, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/anthropic-claude-3.5-sonnet": { + "input_cost_per_token": 3e-6, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/anthropic-claude-3.7-sonnet": { + "input_cost_per_token": 3e-6, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/deepseek-r1-distill-llama-70b": { + "input_cost_per_token": 9.9e-7, + "litellm_provider": "gradient_ai", + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 9.9e-7, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/llama3-8b-instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "gradient_ai", + "max_tokens": 512, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/llama3.3-70b-instruct": { + "input_cost_per_token": 6.5e-7, + "litellm_provider": "gradient_ai", + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 6.5e-7, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/mistral-nemo-instruct-2407": { + "input_cost_per_token": 3e-7, + "litellm_provider": "gradient_ai", + "max_tokens": 512, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/openai-gpt-4o": { + "litellm_provider": "gradient_ai", + "max_tokens": 16384, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/openai-gpt-4o-mini": { + "litellm_provider": "gradient_ai", + "max_tokens": 16384, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/openai-o3": { + "input_cost_per_token": 2e-6, + "litellm_provider": "gradient_ai", + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "gradient_ai/openai-o3-mini": { + "input_cost_per_token": 1.1e-6, + "litellm_provider": "gradient_ai", + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], + "supports_tool_choice": false + }, + "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/gpt-oss-20b-mxfp4-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/gpt-oss-120b-mxfp-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/Gemma-3-4b-it-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/Qwen3-4B-Instruct-2507-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "amazon-nova/nova-micro-v1": { + "input_cost_per_token": 3.5e-8, + "litellm_provider": "amazon_nova", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.4e-7, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "amazon-nova/nova-lite-v1": { + "input_cost_per_token": 6e-8, + "litellm_provider": "amazon_nova", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.4e-7, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon-nova/nova-premier-v1": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "amazon_nova", + "max_input_tokens": 1000000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.25e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": false, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon-nova/nova-pro-v1": { + "input_cost_per_token": 8e-7, + "litellm_provider": "amazon_nova", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.2e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "groq/llama-3.1-8b-instant": { + "input_cost_per_token": 5e-8, + "litellm_provider": "groq", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-8, + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/llama-3.3-70b-versatile": { + "input_cost_per_token": 5.9e-7, + "litellm_provider": "groq", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 7.9e-7, + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/gemma-7b-it": { + "input_cost_per_token": 5e-8, + "litellm_provider": "groq", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-8, + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/meta-llama/llama-guard-4-12b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "groq", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-7 + }, + "groq/meta-llama/llama-4-maverick-17b-128e-instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "groq/meta-llama/llama-4-scout-17b-16e-instruct": { + "input_cost_per_token": 1.1e-7, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.4e-7, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "groq/moonshotai/kimi-k2-instruct-0905": { + "input_cost_per_token": 1e-6, + "output_cost_per_token": 3e-6, + "cache_read_input_token_cost": 5e-7, + "litellm_provider": "groq", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "groq/openai/gpt-oss-120b": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 32766, + "max_tokens": 32766, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "groq/openai/gpt-oss-20b": { + "cache_read_input_token_cost": 3.75e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "groq/playai-tts": { + "input_cost_per_character": 5e-5, + "litellm_provider": "groq", + "max_input_tokens": 10000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "audio_speech" + }, + "groq/qwen/qwen3-32b": { + "input_cost_per_token": 2.9e-7, + "litellm_provider": "groq", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 5.9e-7, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/whisper-large-v3": { + "input_cost_per_second": 3.083e-5, + "litellm_provider": "groq", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "groq/whisper-large-v3-turbo": { + "input_cost_per_second": 1.111e-5, + "litellm_provider": "groq", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "hd/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 7.629e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "hd/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 6.539e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "hd/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 6.539e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "heroku/claude-3-5-haiku": { + "litellm_provider": "heroku", + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "heroku/claude-3-5-sonnet-latest": { + "litellm_provider": "heroku", + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "heroku/claude-3-7-sonnet": { + "litellm_provider": "heroku", + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "heroku/claude-4-sonnet": { + "litellm_provider": "heroku", + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "high/1024-x-1024/gpt-image-1": { + "input_cost_per_image": 0.167, + "input_cost_per_pixel": 1.59263611e-7, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "high/1024-x-1536/gpt-image-1": { + "input_cost_per_image": 0.25, + "input_cost_per_pixel": 1.58945719e-7, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "high/1536-x-1024/gpt-image-1": { + "input_cost_per_image": 0.25, + "input_cost_per_pixel": 1.58945719e-7, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/QwQ-32B": { + "input_cost_per_token": 2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/Qwen2.5-72B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/Qwen3-235B-A22B": { + "input_cost_per_token": 2e-6, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-R1": { + "input_cost_per_token": 4e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-R1-0528": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-V3": { + "input_cost_per_token": 2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-V3-0324": { + "input_cost_per_token": 4e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Llama-3.2-3B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3-70B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3.1-405B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/moonshotai/Kimi-K2-Instruct": { + "input_cost_per_token": 2e-6, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "j2-light": { + "input_cost_per_token": 3e-6, + "litellm_provider": "ai21", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 3e-6 + }, + "j2-mid": { + "input_cost_per_token": 1e-5, + "litellm_provider": "ai21", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 1e-5 + }, + "j2-ultra": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "ai21", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 1.5e-5 + }, + "jamba-1.5": { + "input_cost_per_token": 2e-7, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "jamba-1.5-large": { + "input_cost_per_token": 2e-6, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_tool_choice": true + }, + "jamba-1.5-large@001": { + "input_cost_per_token": 2e-6, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_tool_choice": true + }, + "jamba-1.5-mini": { + "input_cost_per_token": 2e-7, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "jamba-1.5-mini@001": { + "input_cost_per_token": 2e-7, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "jamba-large-1.6": { + "input_cost_per_token": 2e-6, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_tool_choice": true + }, + "jamba-large-1.7": { + "input_cost_per_token": 2e-6, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_tool_choice": true + }, + "jamba-mini-1.6": { + "input_cost_per_token": 2e-7, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "jamba-mini-1.7": { + "input_cost_per_token": 2e-7, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "jina-reranker-v2-base-multilingual": { + "input_cost_per_token": 1.8e-8, + "litellm_provider": "jina_ai", + "max_document_chunks_per_query": 2048, + "max_input_tokens": 1024, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "rerank", + "output_cost_per_token": 1.8e-8 + }, + "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-6, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 3.3e-6, + "input_cost_per_token_above_200k_tokens": 6.6e-6, + "output_cost_per_token_above_200k_tokens": 2.475e-5, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "jp.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-6, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-6, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "lambda_ai/deepseek-llama3.3-70b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/deepseek-r1-0528": { + "input_cost_per_token": 2e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/deepseek-r1-671b": { + "input_cost_per_token": 8e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/deepseek-v3-0324": { + "input_cost_per_token": 2e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/hermes3-405b": { + "input_cost_per_token": 8e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/hermes3-70b": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/hermes3-8b": { + "input_cost_per_token": 2.5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-8, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/lfm-40b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/lfm-7b": { + "input_cost_per_token": 2.5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-8, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama-4-maverick-17b-128e-instruct-fp8": { + "input_cost_per_token": 5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama-4-scout-17b-16e-instruct": { + "input_cost_per_token": 5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 16384, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-405b-instruct-fp8": { + "input_cost_per_token": 8e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-70b-instruct-fp8": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-8b-instruct": { + "input_cost_per_token": 2.5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-8, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-nemotron-70b-instruct-fp8": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.2-11b-vision-instruct": { + "input_cost_per_token": 1.5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-8, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "lambda_ai/llama3.2-3b-instruct": { + "input_cost_per_token": 1.5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-8, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.3-70b-instruct-fp8": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/qwen25-coder-32b-instruct": { + "input_cost_per_token": 5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/qwen3-32b-fp8": { + "input_cost_per_token": 5e-8, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "low/1024-x-1024/gpt-image-1": { + "input_cost_per_image": 0.011, + "input_cost_per_pixel": 1.0490417e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "low/1024-x-1536/gpt-image-1": { + "input_cost_per_image": 0.016, + "input_cost_per_pixel": 1.0172526e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "low/1536-x-1024/gpt-image-1": { + "input_cost_per_image": 0.016, + "input_cost_per_pixel": 1.0172526e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "luminous-base": { + "input_cost_per_token": 3e-5, + "litellm_provider": "aleph_alpha", + "max_tokens": 2048, + "mode": "completion", + "output_cost_per_token": 3.3e-5 + }, + "luminous-base-control": { + "input_cost_per_token": 3.75e-5, + "litellm_provider": "aleph_alpha", + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 4.125e-5 + }, + "luminous-extended": { + "input_cost_per_token": 4.5e-5, + "litellm_provider": "aleph_alpha", + "max_tokens": 2048, + "mode": "completion", + "output_cost_per_token": 4.95e-5 + }, + "luminous-extended-control": { + "input_cost_per_token": 5.625e-5, + "litellm_provider": "aleph_alpha", + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 6.1875e-5 + }, + "luminous-supreme": { + "input_cost_per_token": 0.000175, + "litellm_provider": "aleph_alpha", + "max_tokens": 2048, + "mode": "completion", + "output_cost_per_token": 0.0001925 + }, + "luminous-supreme-control": { + "input_cost_per_token": 0.00021875, + "litellm_provider": "aleph_alpha", + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 0.000240625 + }, + "max-x-max/50-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.036 + }, + "max-x-max/max-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.072 + }, + "medium/1024-x-1024/gpt-image-1": { + "input_cost_per_image": 0.042, + "input_cost_per_pixel": 4.0054321e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1024-x-1536/gpt-image-1": { + "input_cost_per_image": 0.063, + "input_cost_per_pixel": 4.0054321e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1536-x-1024/gpt-image-1": { + "input_cost_per_image": 0.063, + "input_cost_per_pixel": 4.0054321e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": ["/v1/images/generations"] + }, + "low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.005, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.011, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "medlm-large": { + "input_cost_per_character": 5e-6, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_character": 1.5e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "medlm-medium": { + "input_cost_per_character": 5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "meta.llama2-13b-chat-v1": { + "input_cost_per_token": 7.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "meta.llama2-70b-chat-v1": { + "input_cost_per_token": 1.95e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.56e-6 + }, + "meta.llama3-1-405b-instruct-v1:0": { + "input_cost_per_token": 5.32e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.6e-5, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-1-70b-instruct-v1:0": { + "input_cost_per_token": 9.9e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 9.9e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-1-8b-instruct-v1:0": { + "input_cost_per_token": 2.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.2e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-11b-instruct-v1:0": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.5e-7, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "meta.llama3-2-1b-instruct-v1:0": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-3b-instruct-v1:0": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-90b-instruct-v1:0": { + "input_cost_per_token": 2e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "meta.llama3-3-70b-instruct-v1:0": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.2e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.5e-6 + }, + "meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "meta.llama4-maverick-17b-instruct-v1:0": { + "input_cost_per_token": 2.4e-7, + "input_cost_per_token_batches": 1.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 9.7e-7, + "output_cost_per_token_batches": 4.85e-7, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama4-scout-17b-instruct-v1:0": { + "input_cost_per_token": 1.7e-7, + "input_cost_per_token_batches": 8.5e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.6e-7, + "output_cost_per_token_batches": 3.3e-7, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta_llama/Llama-3.3-70B-Instruct": { + "litellm_provider": "meta_llama", + "max_input_tokens": 128000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "meta_llama/Llama-3.3-8B-Instruct": { + "litellm_provider": "meta_llama", + "max_input_tokens": 128000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "litellm_provider": "meta_llama", + "max_input_tokens": 1000000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "meta_llama/Llama-4-Scout-17B-16E-Instruct-FP8": { + "litellm_provider": "meta_llama", + "max_input_tokens": 10000000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "minimax.minimax-m2": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "supports_system_messages": true + }, + "minimax/speech-02-hd": { + "input_cost_per_character": 0.0001, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "minimax/speech-02-turbo": { + "input_cost_per_character": 6e-5, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "minimax/speech-2.6-hd": { + "input_cost_per_character": 0.0001, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "minimax/speech-2.6-turbo": { + "input_cost_per_character": 6e-5, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "minimax/MiniMax-M2.1": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.2e-6, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.1-lightning": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 2.4e-6, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.2e-6, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, + "mistral.magistral-small-2509": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true + }, + "mistral.ministral-3-14b-instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_function_calling": true, + "supports_system_messages": true + }, + "mistral.ministral-3-3b-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_system_messages": true + }, + "mistral.ministral-3-8b-instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "supports_function_calling": true, + "supports_system_messages": true + }, + "mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_tool_choice": true + }, + "mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 8e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_function_calling": true + }, + "mistral.mistral-large-2407-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 9e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "mistral.mistral-large-3-675b-instruct": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_system_messages": true + }, + "mistral.mistral-small-2402-v1:0": { + "input_cost_per_token": 1e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_function_calling": true + }, + "mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 4.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-7, + "supports_tool_choice": true + }, + "mistral.voxtral-mini-3b-2507": { + "input_cost_per_token": 4e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-8, + "supports_audio_input": true, + "supports_system_messages": true + }, + "mistral.voxtral-small-24b-2507": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_audio_input": true, + "supports_system_messages": true + }, + "mistral/codestral-2405": { + "input_cost_per_token": 1e-6, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/codestral-2508": { + "input_cost_per_token": 3e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "source": "https://mistral.ai/news/codestral-25-08", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/codestral-latest": { + "input_cost_per_token": 1e-6, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/codestral-mamba-latest": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "mistral/devstral-medium-2507": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://mistral.ai/news/devstral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-small-2505": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://mistral.ai/news/devstral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-small-2507": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://mistral.ai/news/devstral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/labs-devstral-small-2512": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://docs.mistral.ai/models/devstral-small-2-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-2512": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-medium-2506": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-medium-2509": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-ocr-latest": { + "litellm_provider": "mistral", + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://mistral.ai/pricing#api-pricing" + }, + "mistral/mistral-ocr-2505-completion": { + "litellm_provider": "mistral", + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://mistral.ai/pricing#api-pricing" + }, + "mistral/magistral-medium-latest": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-small-2506": { + "input_cost_per_token": 5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://mistral.ai/pricing#api-pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-small-latest": { + "input_cost_per_token": 5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://mistral.ai/pricing#api-pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-embed": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, + "mistral/codestral-embed": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, + "mistral/codestral-embed-2505": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, + "mistral/mistral-large-2402": { + "input_cost_per_token": 4e-6, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-large-2407": { + "input_cost_per_token": 3e-6, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-large-2411": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-large-latest": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-large-3": { + "input_cost_per_token": 5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-medium": { + "input_cost_per_token": 2.7e-6, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 8.1e-6, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-medium-2312": { + "input_cost_per_token": 2.7e-6, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 8.1e-6, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-medium-2505": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-medium-latest": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-small": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-small-latest": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-tiny": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-codestral-mamba": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "mistral/open-mistral-7b": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mistral-nemo": { + "input_cost_per_token": 3e-7, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mistral-nemo-2407": { + "input_cost_per_token": 3e-7, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mixtral-8x22b": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 65336, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mixtral-8x7b": { + "input_cost_per_token": 7e-7, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/pixtral-12b-2409": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/pixtral-large-2411": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/pixtral-large-latest": { + "input_cost_per_token": 2e-6, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot.kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "supports_reasoning": true, + "supports_system_messages": true + }, + "moonshot/kimi-k2-0711-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-0905-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-turbo-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 1.15e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 8e-6, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-latest": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-latest-128k": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-latest-32k": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-latest-8k": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 2e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-thinking-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_vision": true + }, + "moonshot/kimi-k2-thinking": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-thinking-turbo": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 1.15e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 8e-6, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/moonshot-v1-128k": { + "input_cost_per_token": 2e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-128k-0430": { + "input_cost_per_token": 2e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-128k-vision-preview": { + "input_cost_per_token": 2e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/moonshot-v1-32k": { + "input_cost_per_token": 1e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-32k-0430": { + "input_cost_per_token": 1e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-32k-vision-preview": { + "input_cost_per_token": 1e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/moonshot-v1-8k": { + "input_cost_per_token": 2e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-8k-0430": { + "input_cost_per_token": 2e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-8k-vision-preview": { + "input_cost_per_token": 2e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/moonshot-v1-auto": { + "input_cost_per_token": 2e-6, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "morph/morph-v3-fast": { + "input_cost_per_token": 8e-7, + "litellm_provider": "morph", + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": false + }, + "morph/morph-v3-large": { + "input_cost_per_token": 9e-7, + "litellm_provider": "morph", + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "output_cost_per_token": 1.9e-6, + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": false + }, + "multimodalembedding": { + "input_cost_per_character": 2e-7, + "input_cost_per_image": 0.0001, + "input_cost_per_token": 8e-7, + "input_cost_per_video_per_second": 0.0005, + "input_cost_per_video_per_second_above_15s_interval": 0.002, + "input_cost_per_video_per_second_above_8s_interval": 0.001, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] + }, + "multimodalembedding@001": { + "input_cost_per_character": 2e-7, + "input_cost_per_image": 0.0001, + "input_cost_per_token": 8e-7, + "input_cost_per_video_per_second": 0.0005, + "input_cost_per_video_per_second_above_15s_interval": 0.002, + "input_cost_per_video_per_second_above_8s_interval": 0.001, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] + }, + "nscale/Qwen/QwQ-32B": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 6e-8, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-3B-Instruct": { + "input_cost_per_token": 1e-8, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 3e-8, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-7B-Instruct": { + "input_cost_per_token": 1e-8, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 3e-8, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/black-forest-labs/FLUX.1-schnell": { + "input_cost_per_pixel": 1.3e-9, + "litellm_provider": "nscale", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", + "supported_endpoints": ["/v1/images/generations"] + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 3.75e-7, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.75/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 3.75e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { + "input_cost_per_token": 2.5e-8, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.05/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 2.5e-8, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { + "input_cost_per_token": 9e-8, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.18/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 9e-8, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { + "input_cost_per_token": 7e-8, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.14/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 7e-8, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.30/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { + "input_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/meta-llama/Llama-3.1-8B-Instruct": { + "input_cost_per_token": 3e-8, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.06/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 3e-8, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 9e-8, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 2.9e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/mistralai/mixtral-8x22b-instruct-v0.1": { + "input_cost_per_token": 6e-7, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $1.20/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/stabilityai/stable-diffusion-xl-base-1.0": { + "input_cost_per_pixel": 3e-9, + "litellm_provider": "nscale", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", + "supported_endpoints": ["/v1/images/generations"] + }, + "nvidia.nemotron-nano-12b-v2": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_system_messages": true, + "supports_vision": true + }, + "nvidia.nemotron-nano-9b-v2": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.3e-7, + "supports_system_messages": true + }, + "o1": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o1-2024-12-17": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o1-mini": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_vision": true + }, + "o1-mini-2024-09-12": { + "deprecation_date": "2025-10-27", + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 3e-6, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": true + }, + "o1-preview": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": true + }, + "o1-preview-2024-09-12": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": true + }, + "o1-pro": { + "input_cost_per_token": 0.00015, + "input_cost_per_token_batches": 7.5e-5, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 0.0006, + "output_cost_per_token_batches": 0.0003, + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o1-pro-2025-03-19": { + "input_cost_per_token": 0.00015, + "input_cost_per_token_batches": 7.5e-5, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 0.0006, + "output_cost_per_token_batches": 0.0003, + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o3": { + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_flex": 2.5e-7, + "cache_read_input_token_cost_priority": 8.75e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_flex": 1e-6, + "input_cost_per_token_priority": 3.5e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "output_cost_per_token_flex": 4e-6, + "output_cost_per_token_priority": 1.4e-5, + "supported_endpoints": [ + "/v1/responses", + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "o3-2025-04-16": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supported_endpoints": [ + "/v1/responses", + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "o3-deep-research": { + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_token": 1e-5, + "input_cost_per_token_batches": 5e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 4e-5, + "output_cost_per_token_batches": 2e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o3-deep-research-2025-06-26": { + "cache_read_input_token_cost": 2.5e-6, + "input_cost_per_token": 1e-5, + "input_cost_per_token_batches": 5e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 4e-5, + "output_cost_per_token_batches": 2e-5, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o3-mini": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "o3-mini-2025-01-31": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "o3-pro": { + "input_cost_per_token": 2e-5, + "input_cost_per_token_batches": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-5, + "output_cost_per_token_batches": 4e-5, + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o3-pro-2025-06-10": { + "input_cost_per_token": 2e-5, + "input_cost_per_token_batches": 1e-5, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-5, + "output_cost_per_token_batches": 4e-5, + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o4-mini": { + "cache_read_input_token_cost": 2.75e-7, + "cache_read_input_token_cost_flex": 1.375e-7, + "cache_read_input_token_cost_priority": 5e-7, + "input_cost_per_token": 1.1e-6, + "input_cost_per_token_flex": 5.5e-7, + "input_cost_per_token_priority": 2e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "output_cost_per_token_flex": 2.2e-6, + "output_cost_per_token_priority": 8e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "o4-mini-2025-04-16": { + "cache_read_input_token_cost": 2.75e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "o4-mini-deep-research": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-6, + "output_cost_per_token_batches": 4e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o4-mini-deep-research-2025-06-26": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-6, + "output_cost_per_token_batches": 4e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "oci/meta.llama-3.1-405b-instruct": { + "input_cost_per_token": 1.068e-5, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.068e-5, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-3.2-90b-vision-instruct": { + "input_cost_per_token": 2e-6, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-3.3-70b-instruct": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "max_input_tokens": 512000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-4-scout-17b-16e-instruct": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "max_input_tokens": 192000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3": { + "input_cost_per_token": 3e-6, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3-fast": { + "input_cost_per_token": 5e-6, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3-mini": { + "input_cost_per_token": 3e-7, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3-mini-fast": { + "input_cost_per_token": 6e-7, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-6, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-4": { + "input_cost_per_token": 3e-6, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-latest": { + "input_cost_per_token": 1.56e-6, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-6, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-03-2025": { + "input_cost_per_token": 1.56e-6, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-6, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-plus-latest": { + "input_cost_per_token": 1.56e-6, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-6, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "ollama/codegeex4": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": false + }, + "ollama/codegemma": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/codellama": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/deepseek-coder-v2-base": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-lite-base": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-lite-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-v3.1:671b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/gpt-oss:120b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/gpt-oss:20b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/internlm2_5-20b-chat": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/llama2": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama2-uncensored": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/llama2:13b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama2:70b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama2:7b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama3": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama3.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/llama3:70b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama3:8b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/mistral": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mistral-7B-Instruct-v0.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mistral-7B-Instruct-v0.2": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mistral-large-instruct-2407": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mixtral-8x22B-Instruct-v0.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/orca-mini": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/qwen3-coder:480b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/vicuna": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "omni-moderation-2024-09-26": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "omni-moderation-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "omni-moderation-latest-intents": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "openai.gpt-oss-120b-1:0": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openai.gpt-oss-20b-1:0": { + "input_cost_per_token": 7e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openai.gpt-oss-safeguard-120b": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_system_messages": true + }, + "openai.gpt-oss-safeguard-20b": { + "input_cost_per_token": 7e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_system_messages": true + }, + "openrouter/anthropic/claude-2": { + "input_cost_per_token": 1.102e-5, + "litellm_provider": "openrouter", + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3.268e-5, + "supports_tool_choice": true + }, + "openrouter/anthropic/claude-3-5-haiku": { + "input_cost_per_token": 1e-6, + "litellm_provider": "openrouter", + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "openrouter/anthropic/claude-3-5-haiku-20241022": { + "input_cost_per_token": 1e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_function_calling": true, + "supports_tool_choice": true, + "tool_use_system_prompt_tokens": 264 + }, + "openrouter/anthropic/claude-3-haiku": { + "input_cost_per_image": 0.0004, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "openrouter", + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/anthropic/claude-3-haiku-20240307": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 264 + }, + "openrouter/anthropic/claude-3-opus": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 395 + }, + "openrouter/anthropic/claude-3-sonnet": { + "input_cost_per_image": 0.0048, + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/anthropic/claude-3.5-sonnet": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-3.5-sonnet:beta": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-3.7-sonnet": { + "input_cost_per_image": 0.0048, + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-3.7-sonnet:beta": { + "input_cost_per_image": 0.0048, + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-instant-v1": { + "input_cost_per_token": 1.63e-6, + "litellm_provider": "openrouter", + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 5.51e-6, + "supports_tool_choice": true + }, + "openrouter/anthropic/claude-opus-4": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-opus-4.1": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 1.875e-5, + "cache_creation_input_token_cost_above_1hr": 3e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-sonnet-4": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-sonnet-4.5": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "openrouter/bytedance/ui-tars-1.5-7b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://openrouter.ai/api/v1/models/bytedance/ui-tars-1.5-7b", + "supports_tool_choice": true + }, + "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { + "input_cost_per_token": 5e-7, + "litellm_provider": "openrouter", + "max_tokens": 32769, + "mode": "chat", + "output_cost_per_token": 5e-7, + "supports_tool_choice": true + }, + "openrouter/cohere/command-r-plus": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_tool_choice": true + }, + "openrouter/databricks/dbrx-instruct": { + "input_cost_per_token": 6e-7, + "litellm_provider": "openrouter", + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-chat": { + "input_cost_per_token": 1.4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.8e-7, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-chat-v3-0324": { + "input_cost_per_token": 1.4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.8e-7, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-chat-v3.1": { + "input_cost_per_token": 2e-7, + "input_cost_per_token_cache_hit": 2e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 8e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-v3.2-exp": { + "input_cost_per_token": 2e-7, + "input_cost_per_token_cache_hit": 2e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": false, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-coder": { + "input_cost_per_token": 1.4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 66000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-7, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-r1": { + "input_cost_per_token": 5.5e-7, + "input_cost_per_token_cache_hit": 1.4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 65336, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.19e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-r1-0528": { + "input_cost_per_token": 5e-7, + "input_cost_per_token_cache_hit": 1.4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 65336, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.15e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/fireworks/firellava-13b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_tool_choice": true + }, + "openrouter/google/gemini-2.0-flash-001": { + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/gemini-2.5-flash": { + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 3e-7, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/gemini-2.5-pro": { + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "openrouter/google/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 5e-7, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-6, + "output_cost_per_token": 3e-6, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "openrouter/google/gemini-pro-1.5": { + "input_cost_per_image": 0.00265, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.5e-6, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/gemini-pro-vision": { + "input_cost_per_image": 0.0025, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "openrouter", + "max_tokens": 45875, + "mode": "chat", + "output_cost_per_token": 3.75e-7, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/palm-2-chat-bison": { + "input_cost_per_token": 5e-7, + "litellm_provider": "openrouter", + "max_tokens": 25804, + "mode": "chat", + "output_cost_per_token": 5e-7, + "supports_tool_choice": true + }, + "openrouter/google/palm-2-codechat-bison": { + "input_cost_per_token": 5e-7, + "litellm_provider": "openrouter", + "max_tokens": 20070, + "mode": "chat", + "output_cost_per_token": 5e-7, + "supports_tool_choice": true + }, + "openrouter/gryphe/mythomax-l2-13b": { + "input_cost_per_token": 1.875e-6, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.875e-6, + "supports_tool_choice": true + }, + "openrouter/jondurbin/airoboros-l2-70b-2.1": { + "input_cost_per_token": 1.3875e-5, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.3875e-5, + "supports_tool_choice": true + }, + "openrouter/mancer/weaver": { + "input_cost_per_token": 5.625e-6, + "litellm_provider": "openrouter", + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 5.625e-6, + "supports_tool_choice": true + }, + "openrouter/meta-llama/codellama-34b-instruct": { + "input_cost_per_token": 5e-7, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-7, + "supports_tool_choice": true + }, + "openrouter/meta-llama/llama-2-13b-chat": { + "input_cost_per_token": 2e-7, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_tool_choice": true + }, + "openrouter/meta-llama/llama-2-70b-chat": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_tool_choice": true + }, + "openrouter/meta-llama/llama-3-70b-instruct": { + "input_cost_per_token": 5.9e-7, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.9e-7, + "supports_tool_choice": true + }, + "openrouter/meta-llama/llama-3-70b-instruct:nitro": { + "input_cost_per_token": 9e-7, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_tool_choice": true + }, + "openrouter/meta-llama/llama-3-8b-instruct:extended": { + "input_cost_per_token": 2.25e-7, + "litellm_provider": "openrouter", + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.25e-6, + "supports_tool_choice": true + }, + "openrouter/meta-llama/llama-3-8b-instruct:free": { + "input_cost_per_token": 0.0, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_tool_choice": true + }, + "openrouter/microsoft/wizardlm-2-8x22b:nitro": { + "input_cost_per_token": 1e-6, + "litellm_provider": "openrouter", + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1e-6, + "supports_tool_choice": true + }, + "openrouter/minimax/minimax-m2": { + "input_cost_per_token": 2.55e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 1.02e-6, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/mistralai/devstral-2512:free": { + "input_cost_per_image": 0, + "input_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/mistralai/devstral-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/mistralai/ministral-3b-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 1e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/ministral-8b-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/ministral-14b-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 2e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/mistral-large-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/mistral-7b-instruct": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.3e-7, + "supports_tool_choice": true + }, + "openrouter/mistralai/mistral-7b-instruct:free": { + "input_cost_per_token": 0.0, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_tool_choice": true + }, + "openrouter/mistralai/mistral-large": { + "input_cost_per_token": 8e-6, + "litellm_provider": "openrouter", + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.4e-5, + "supports_tool_choice": true + }, + "openrouter/mistralai/mistral-small-3.1-24b-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "openrouter", + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_tool_choice": true + }, + "openrouter/mistralai/mistral-small-3.2-24b-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "openrouter", + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_tool_choice": true + }, + "openrouter/mistralai/mixtral-8x22b-instruct": { + "input_cost_per_token": 6.5e-7, + "litellm_provider": "openrouter", + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 6.5e-7, + "supports_tool_choice": true + }, + "openrouter/nousresearch/nous-hermes-llama2-13b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-3.5-turbo": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "openrouter", + "max_tokens": 4095, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-3.5-turbo-16k": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_tokens": 16383, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-4": { + "input_cost_per_token": 3e-5, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-4-vision-preview": { + "input_cost_per_image": 0.01445, + "input_cost_per_token": 1e-5, + "litellm_provider": "openrouter", + "max_tokens": 130000, + "mode": "chat", + "output_cost_per_token": 3e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1-2025-04-14": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1-mini": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1-mini-2025-04-14": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1-nano": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1-nano-2025-04-14": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4o": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4o-2024-05-13": { + "input_cost_per_token": 5e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-5-chat": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5-codex": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5.2-codex": { + "cache_read_input_token_cost": 1.75e-7, + "input_cost_per_token": 1.75e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5-mini": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5-nano": { + "cache_read_input_token_cost": 5e-9, + "input_cost_per_token": 5e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5.2": { + "input_cost_per_image": 0, + "cache_read_input_token_cost": 1.75e-7, + "input_cost_per_token": 1.75e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-5.2-chat": { + "input_cost_per_image": 0, + "cache_read_input_token_cost": 1.75e-7, + "input_cost_per_token": 1.75e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-5, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-5.2-pro": { + "input_cost_per_image": 0, + "input_cost_per_token": 2.1e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000168, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-oss-120b": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-7, + "source": "https://openrouter.ai/openai/gpt-oss-120b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-oss-20b": { + "input_cost_per_token": 2e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://openrouter.ai/openai/gpt-oss-20b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openrouter/openai/o1": { + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/o1-mini": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/openai/o1-mini-2024-09-12": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/openai/o1-preview": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/openai/o1-preview-2024-09-12": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/openai/o3-mini": { + "input_cost_per_token": 1.1e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/openai/o3-mini-high": { + "input_cost_per_token": 1.1e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.4e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/pygmalionai/mythalion-13b": { + "input_cost_per_token": 1.875e-6, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.875e-6, + "supports_tool_choice": true + }, + "openrouter/qwen/qwen-2.5-coder-32b-instruct": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 33792, + "max_output_tokens": 33792, + "max_tokens": 33792, + "mode": "chat", + "output_cost_per_token": 1.8e-7, + "supports_tool_choice": true + }, + "openrouter/qwen/qwen-vl-plus": { + "input_cost_per_token": 2.1e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 6.3e-7, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3-coder": { + "input_cost_per_token": 2.2e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 262100, + "max_output_tokens": 262100, + "max_tokens": 262100, + "mode": "chat", + "output_cost_per_token": 9.5e-7, + "source": "https://openrouter.ai/qwen/qwen3-coder", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "openrouter/switchpoint/router": { + "input_cost_per_token": 8.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3.4e-6, + "source": "https://openrouter.ai/switchpoint/router", + "supports_tool_choice": true + }, + "openrouter/undi95/remm-slerp-l2-13b": { + "input_cost_per_token": 1.875e-6, + "litellm_provider": "openrouter", + "max_tokens": 6144, + "mode": "chat", + "output_cost_per_token": 1.875e-6, + "supports_tool_choice": true + }, + "openrouter/x-ai/grok-4": { + "input_cost_per_token": 3e-6, + "litellm_provider": "openrouter", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://openrouter.ai/x-ai/grok-4", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "openrouter/x-ai/grok-4-fast:free": { + "input_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 2000000, + "max_output_tokens": 30000, + "max_tokens": 30000, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://openrouter.ai/x-ai/grok-4-fast:free", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_web_search": false + }, + "openrouter/z-ai/glm-4.6": { + "input_cost_per_token": 4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 202800, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1.75e-6, + "source": "https://openrouter.ai/z-ai/glm-4.6", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/z-ai/glm-4.6:exacto": { + "input_cost_per_token": 4.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 202800, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1.9e-6, + "source": "https://openrouter.ai/z-ai/glm-4.6:exacto", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 6.7e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 6.7e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/deepseek-r1-distill-llama-70b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Llama-3.1-8B-Instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/llama-3-1-8b-instruct", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Meta-Llama-3_1-70B-Instruct": { + "input_cost_per_token": 6.7e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 6.7e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-1-70b-instruct", + "supports_function_calling": false, + "supports_response_schema": false, + "supports_tool_choice": false + }, + "ovhcloud/Meta-Llama-3_3-70B-Instruct": { + "input_cost_per_token": 6.7e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 6.7e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Mistral-7B-Instruct-v0.3": { + "input_cost_per_token": 1e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 127000, + "max_output_tokens": 127000, + "max_tokens": 127000, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-7b-instruct-v0-3", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Mistral-Nemo-Instruct-2407": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 118000, + "max_output_tokens": 118000, + "max_tokens": 118000, + "mode": "chat", + "output_cost_per_token": 1.3e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-nemo-instruct-2407", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": { + "input_cost_per_token": 9e-8, + "litellm_provider": "ovhcloud", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.8e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-small-3-2-24b-instruct-2506", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "ovhcloud/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 6.3e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 6.3e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/mixtral-8x7b-instruct-v0-1", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 8.7e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 8.7e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-coder-32b-instruct", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/Qwen2.5-VL-72B-Instruct": { + "input_cost_per_token": 9.1e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 9.1e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-vl-72b-instruct", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "ovhcloud/Qwen3-32B": { + "input_cost_per_token": 8e-8, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.3e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen3-32b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/gpt-oss-120b": { + "input_cost_per_token": 8e-8, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-120b", + "supports_function_calling": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/gpt-oss-20b": { + "input_cost_per_token": 4e-8, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-20b", + "supports_function_calling": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/llava-v1.6-mistral-7b-hf": { + "input_cost_per_token": 2.9e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.9e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/llava-next-mistral-7b", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "ovhcloud/mamba-codestral-7B-v0.1": { + "input_cost_per_token": 1.9e-7, + "litellm_provider": "ovhcloud", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.9e-7, + "source": "https://endpoints.ai.cloud.ovh.net/models/mamba-codestral-7b-v0-1", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "palm/chat-bison": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/chat-bison-001": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-001": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-safety-off": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-safety-recitation-off": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "parallel_ai/search": { + "input_cost_per_query": 0.004, + "litellm_provider": "parallel_ai", + "mode": "search" + }, + "parallel_ai/search-pro": { + "input_cost_per_query": 0.009, + "litellm_provider": "parallel_ai", + "mode": "search" + }, + "perplexity/codellama-34b-instruct": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-6 + }, + "perplexity/codellama-70b-instruct": { + "input_cost_per_token": 7e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.8e-6 + }, + "perplexity/llama-2-70b-chat": { + "input_cost_per_token": 7e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-6 + }, + "perplexity/llama-3.1-70b-instruct": { + "input_cost_per_token": 1e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "perplexity/llama-3.1-8b-instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-7 + }, + "perplexity/llama-3.1-sonar-huge-128k-online": { + "deprecation_date": "2025-02-22", + "input_cost_per_token": 5e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "max_tokens": 127072, + "mode": "chat", + "output_cost_per_token": 5e-6 + }, + "perplexity/llama-3.1-sonar-large-128k-chat": { + "deprecation_date": "2025-02-22", + "input_cost_per_token": 1e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "perplexity/llama-3.1-sonar-large-128k-online": { + "deprecation_date": "2025-02-22", + "input_cost_per_token": 1e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "max_tokens": 127072, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "perplexity/llama-3.1-sonar-small-128k-chat": { + "deprecation_date": "2025-02-22", + "input_cost_per_token": 2e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-7 + }, + "perplexity/llama-3.1-sonar-small-128k-online": { + "deprecation_date": "2025-02-22", + "input_cost_per_token": 2e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "max_tokens": 127072, + "mode": "chat", + "output_cost_per_token": 2e-7 + }, + "perplexity/mistral-7b-instruct": { + "input_cost_per_token": 7e-8, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-7 + }, + "perplexity/mixtral-8x7b-instruct": { + "input_cost_per_token": 7e-8, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-7 + }, + "perplexity/pplx-70b-chat": { + "input_cost_per_token": 7e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-6 + }, + "perplexity/pplx-70b-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0.0, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-6 + }, + "perplexity/pplx-7b-chat": { + "input_cost_per_token": 7e-8, + "litellm_provider": "perplexity", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.8e-7 + }, + "perplexity/pplx-7b-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0.0, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-7 + }, + "perplexity/sonar": { + "input_cost_per_token": 1e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-6, + "search_context_cost_per_query": { + "search_context_size_high": 0.012, + "search_context_size_low": 0.005, + "search_context_size_medium": 0.008 + }, + "supports_web_search": true + }, + "perplexity/sonar-deep-research": { + "citation_cost_per_token": 2e-6, + "input_cost_per_token": 2e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-6, + "output_cost_per_token": 8e-6, + "search_context_cost_per_query": { + "search_context_size_high": 0.005, + "search_context_size_low": 0.005, + "search_context_size_medium": 0.005 + }, + "supports_reasoning": true, + "supports_web_search": true + }, + "perplexity/sonar-medium-chat": { + "input_cost_per_token": 6e-7, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.8e-6 + }, + "perplexity/sonar-medium-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0, + "litellm_provider": "perplexity", + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "max_tokens": 12000, + "mode": "chat", + "output_cost_per_token": 1.8e-6 + }, + "perplexity/sonar-pro": { + "input_cost_per_token": 3e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 200000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.014, + "search_context_size_low": 0.006, + "search_context_size_medium": 0.01 + }, + "supports_web_search": true + }, + "perplexity/sonar-reasoning": { + "input_cost_per_token": 1e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-6, + "search_context_cost_per_query": { + "search_context_size_high": 0.014, + "search_context_size_low": 0.005, + "search_context_size_medium": 0.008 + }, + "supports_reasoning": true, + "supports_web_search": true + }, + "perplexity/sonar-reasoning-pro": { + "input_cost_per_token": 2e-6, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "search_context_cost_per_query": { + "search_context_size_high": 0.014, + "search_context_size_low": 0.006, + "search_context_size_medium": 0.01 + }, + "supports_reasoning": true, + "supports_web_search": true + }, + "perplexity/sonar-small-chat": { + "input_cost_per_token": 7e-8, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.8e-7 + }, + "perplexity/sonar-small-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0, + "litellm_provider": "perplexity", + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "max_tokens": 12000, + "mode": "chat", + "output_cost_per_token": 2.8e-7 + }, + "publicai/swiss-ai/apertus-8b-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/swiss-ai/apertus-70b-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/aisingapore/Gemma-SEA-LION-v4-27B-IT": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/salamandra-7b-instruct-tools-16k": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/ALIA-40b-instruct_Q8_0": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/aisingapore/Qwen-SEA-LION-v4-32B-IT": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Think": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "publicai/allenai/Olmo-3-32B-Think": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "qwen.qwen3-coder-480b-a35b-v1:0": { + "input_cost_per_token": 2.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.8e-6, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "qwen.qwen3-235b-a22b-2507-v1:0": { + "input_cost_per_token": 2.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8.8e-7, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "qwen.qwen3-coder-30b-a3b-v1:0": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "qwen.qwen3-32b-v1:0": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "qwen.qwen3-next-80b-a3b": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "supports_function_calling": true, + "supports_system_messages": true + }, + "qwen.qwen3-vl-235b-a22b": { + "input_cost_per_token": 5.3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.66e-6, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_vision": true + }, + "recraft/recraftv2": { + "litellm_provider": "recraft", + "mode": "image_generation", + "output_cost_per_image": 0.022, + "source": "https://www.recraft.ai/docs#pricing", + "supported_endpoints": ["/v1/images/generations"] + }, + "recraft/recraftv3": { + "litellm_provider": "recraft", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://www.recraft.ai/docs#pricing", + "supported_endpoints": ["/v1/images/generations"] + }, + "replicate/meta/llama-2-13b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-7, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-13b-chat": { + "input_cost_per_token": 1e-7, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-7, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-70b": { + "input_cost_per_token": 6.5e-7, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-70b-chat": { + "input_cost_per_token": 6.5e-7, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-7b": { + "input_cost_per_token": 5e-8, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-7b-chat": { + "input_cost_per_token": 5e-8, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-70b": { + "input_cost_per_token": 6.5e-7, + "litellm_provider": "replicate", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-70b-instruct": { + "input_cost_per_token": 6.5e-7, + "litellm_provider": "replicate", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.75e-6, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-8b": { + "input_cost_per_token": 5e-8, + "litellm_provider": "replicate", + "max_input_tokens": 8086, + "max_output_tokens": 8086, + "max_tokens": 8086, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-8b-instruct": { + "input_cost_per_token": 5e-8, + "litellm_provider": "replicate", + "max_input_tokens": 8086, + "max_output_tokens": 8086, + "max_tokens": 8086, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_tool_choice": true + }, + "replicate/mistralai/mistral-7b-instruct-v0.2": { + "input_cost_per_token": 5e-8, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_tool_choice": true + }, + "replicate/mistralai/mistral-7b-v0.1": { + "input_cost_per_token": 5e-8, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_tool_choice": true + }, + "replicate/mistralai/mixtral-8x7b-instruct-v0.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-6, + "supports_tool_choice": true + }, + "replicate/openai/gpt-5": { + "input_cost_per_token": 1.25e-6, + "output_cost_per_token": 1e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicateopenai/gpt-oss-20b": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 3.6e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-4.5-haiku": { + "input_cost_per_token": 1e-6, + "output_cost_per_token": 5e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/ibm-granite/granite-3.3-8b-instruct": { + "input_cost_per_token": 3e-8, + "output_cost_per_token": 2.5e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4o": { + "input_cost_per_token": 2.5e-6, + "output_cost_per_token": 1e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_audio_input": true, + "supports_audio_output": true + }, + "replicate/openai/o4-mini": { + "input_cost_per_token": 1e-6, + "output_cost_per_token": 4e-6, + "output_cost_per_reasoning_token": 4e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/o1-mini": { + "input_cost_per_token": 1.1e-6, + "output_cost_per_token": 4.4e-6, + "output_cost_per_reasoning_token": 4.4e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/o1": { + "input_cost_per_token": 1.5e-5, + "output_cost_per_token": 6e-5, + "output_cost_per_reasoning_token": 6e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4o-mini": { + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/qwen/qwen3-235b-a22b-instruct-2507": { + "input_cost_per_token": 2.64e-7, + "output_cost_per_token": 1.06e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-4-sonnet": { + "input_cost_per_token": 3e-6, + "output_cost_per_token": 1.5e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/deepseek-ai/deepseek-v3": { + "input_cost_per_token": 1.45e-6, + "output_cost_per_token": 1.45e-6, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-3.7-sonnet": { + "input_cost_per_token": 3e-6, + "output_cost_per_token": 1.5e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/anthropic/claude-3.5-haiku": { + "input_cost_per_token": 1e-6, + "output_cost_per_token": 5e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/anthropic/claude-3.5-sonnet": { + "input_cost_per_token": 3.75e-6, + "output_cost_per_token": 1.875e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/google/gemini-3-pro": { + "input_cost_per_token": 2e-6, + "output_cost_per_token": 1.2e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/anthropic/claude-4.5-sonnet": { + "input_cost_per_token": 3e-6, + "output_cost_per_token": 1.5e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/openai/gpt-4.1": { + "input_cost_per_token": 2e-6, + "output_cost_per_token": 8e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-4.1-nano": { + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4.1-mini": { + "input_cost_per_token": 4e-7, + "output_cost_per_token": 1.6e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-5-nano": { + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-5-mini": { + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/google/gemini-2.5-flash": { + "input_cost_per_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-oss-120b": { + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 7.2e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/deepseek-ai/deepseek-v3.1": { + "input_cost_per_token": 6.72e-7, + "output_cost_per_token": 2.016e-6, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/xai/grok-4": { + "input_cost_per_token": 7.2e-6, + "output_cost_per_token": 3.6e-5, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/deepseek-ai/deepseek-r1": { + "input_cost_per_token": 3.75e-6, + "output_cost_per_token": 1e-5, + "output_cost_per_reasoning_token": 1e-5, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_reasoning": true, + "supports_system_messages": true + }, + "rerank-english-v2.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-english-v3.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-multilingual-v2.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-multilingual-v3.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-v3.5": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "nvidia_nim/nvidia/nv-rerankqa-mistral-4b-v3": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "nvidia_nim/nvidia/llama-3_2-nv-rerankqa-1b-v2": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "nvidia_nim/ranking/nvidia/llama-3.2-nv-rerankqa-1b-v2": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-13b": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-13b-f": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-70b": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-70b-b-f": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-7b": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-7b-f": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "sambanova/DeepSeek-R1": { + "input_cost_per_token": 5e-6, + "litellm_provider": "sambanova", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 7e-6, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 7e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.4e-6, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-V3-0324": { + "input_cost_per_token": 3e-6, + "litellm_provider": "sambanova", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4.5e-6, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "sambanova/Llama-4-Maverick-17B-128E-Instruct": { + "input_cost_per_token": 6.3e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount" + }, + "mode": "chat", + "output_cost_per_token": 1.8e-6, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "sambanova/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 4e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount" + }, + "mode": "chat", + "output_cost_per_token": 7e-7, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-3.1-405B-Instruct": { + "input_cost_per_token": 5e-6, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-3.1-8B-Instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-3.2-1B-Instruct": { + "input_cost_per_token": 4e-8, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 8e-8, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Meta-Llama-3.2-3B-Instruct": { + "input_cost_per_token": 8e-8, + "litellm_provider": "sambanova", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.6e-7, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Meta-Llama-3.3-70B-Instruct": { + "input_cost_per_token": 6e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-Guard-3-8B": { + "input_cost_per_token": 3e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/QwQ-32B": { + "input_cost_per_token": 5e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-6, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Qwen2-Audio-7B-Instruct": { + "input_cost_per_token": 5e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0001, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_audio_input": true + }, + "sambanova/Qwen3-32B": { + "input_cost_per_token": 4e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-7, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "sambanova/DeepSeek-V3.1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 3e-6, + "output_cost_per_token": 4.5e-6, + "litellm_provider": "sambanova", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/gpt-oss-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 3e-6, + "output_cost_per_token": 4.5e-6, + "litellm_provider": "sambanova", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "snowflake/claude-3-5-sonnet": { + "litellm_provider": "snowflake", + "max_input_tokens": 18000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "supports_computer_use": true + }, + "snowflake/deepseek-r1": { + "litellm_provider": "snowflake", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "supports_reasoning": true + }, + "snowflake/gemma-7b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/jamba-1.5-large": { + "litellm_provider": "snowflake", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/jamba-1.5-mini": { + "litellm_provider": "snowflake", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/jamba-instruct": { + "litellm_provider": "snowflake", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama2-70b-chat": { + "litellm_provider": "snowflake", + "max_input_tokens": 4096, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3-8b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.1-405b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.1-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.1-8b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.2-1b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.2-3b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.3-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mistral-7b": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mistral-large": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mistral-large2": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mixtral-8x7b": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/reka-core": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/reka-flash": { + "litellm_provider": "snowflake", + "max_input_tokens": 100000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/snowflake-arctic": { + "litellm_provider": "snowflake", + "max_input_tokens": 4096, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/snowflake-llama-3.1-405b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/snowflake-llama-3.3-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "stability/sd3": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.065, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/sd3-large": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.065, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/sd3-large-turbo": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/sd3-medium": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.035, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/sd3.5-large": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.065, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/sd3.5-large-turbo": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/sd3.5-medium": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.035, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/stable-image-ultra": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.08, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability/inpaint": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/outpaint": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.004, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/erase": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/search-and-replace": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/search-and-recolor": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/remove-background": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/replace-background-and-relight": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.008, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/sketch": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/structure": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/style": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/style-transfer": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.008, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/fast": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.002, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/conservative": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.04, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/creative": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.06, + "supported_endpoints": ["/v1/images/edits"] + }, + "stability/stable-image-core": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.03, + "supported_endpoints": ["/v1/images/generations"] + }, + "stability.sd3-5-large-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.08 + }, + "stability.sd3-large-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.08 + }, + "stability.stable-image-core-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.04 + }, + "stability.stable-conservative-upscale-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.4 + }, + "stability.stable-creative-upscale-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.6 + }, + "stability.stable-fast-upscale-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.03 + }, + "stability.stable-outpaint-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.06 + }, + "stability.stable-image-control-sketch-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-control-structure-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-erase-object-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-inpaint-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-remove-background-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-search-recolor-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-search-replace-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-style-guide-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-style-transfer-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.08 + }, + "stability.stable-image-core-v1:1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.04 + }, + "stability.stable-image-ultra-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.14 + }, + "stability.stable-image-ultra-v1:1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.14 + }, + "standard/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 3.81469e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "standard/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 4.359e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "standard/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 4.359e-8, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "linkup/search": { + "input_cost_per_query": 0.00587, + "litellm_provider": "linkup", + "mode": "search" + }, + "linkup/search-deep": { + "input_cost_per_query": 0.05867, + "litellm_provider": "linkup", + "mode": "search" + }, + "tavily/search": { + "input_cost_per_query": 0.008, + "litellm_provider": "tavily", + "mode": "search" + }, + "tavily/search-advanced": { + "input_cost_per_query": 0.016, + "litellm_provider": "tavily", + "mode": "search" + }, + "text-bison": { + "input_cost_per_character": 2.5e-7, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "completion", + "output_cost_per_character": 5e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k@002": { + "input_cost_per_character": 2.5e-7, + "input_cost_per_token": 1.25e-7, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "output_cost_per_token": 1.25e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@001": { + "input_cost_per_character": 2.5e-7, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@002": { + "input_cost_per_character": 2.5e-7, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_character": 5e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-completion-codestral/codestral-2405": { + "input_cost_per_token": 0.0, + "litellm_provider": "text-completion-codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "completion", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/" + }, + "text-completion-codestral/codestral-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "text-completion-codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "completion", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/" + }, + "text-embedding-004": { + "deprecation_date": "2026-01-14", + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-embedding-005": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-embedding-3-large": { + "input_cost_per_token": 1.3e-7, + "input_cost_per_token_batches": 6.5e-8, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_cost_per_token_batches": 0.0, + "output_vector_size": 3072 + }, + "text-embedding-3-small": { + "input_cost_per_token": 2e-8, + "input_cost_per_token_batches": 1e-8, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_cost_per_token_batches": 0.0, + "output_vector_size": 1536 + }, + "text-embedding-ada-002": { + "input_cost_per_token": 1e-7, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "text-embedding-ada-002-v2": { + "input_cost_per_token": 1e-7, + "input_cost_per_token_batches": 5e-8, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_cost_per_token_batches": 0.0 + }, + "text-embedding-large-exp-03-07": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-embedding-preview-0409": { + "input_cost_per_token": 6.25e-9, + "input_cost_per_token_batch_requests": 5e-9, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "text-moderation-007": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "text-moderation-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "text-moderation-stable": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "text-multilingual-embedding-002": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-multilingual-embedding-preview-0409": { + "input_cost_per_token": 6.25e-9, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-unicorn": { + "input_cost_per_token": 1e-5, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 2.8e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-unicorn@001": { + "input_cost_per_token": 1e-5, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 2.8e-5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko-multilingual": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko-multilingual@001": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko@001": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko@003": { + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "together-ai-21.1b-41b": { + "input_cost_per_token": 8e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8e-7 + }, + "together-ai-4.1b-8b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 2e-7 + }, + "together-ai-41.1b-80b": { + "input_cost_per_token": 9e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 9e-7 + }, + "together-ai-8.1b-21b": { + "input_cost_per_token": 3e-7, + "litellm_provider": "together_ai", + "max_tokens": 1000, + "mode": "chat", + "output_cost_per_token": 3e-7 + }, + "together-ai-81.1b-110b": { + "input_cost_per_token": 1.8e-6, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 1.8e-6 + }, + "together-ai-embedding-151m-to-350m": { + "input_cost_per_token": 1.6e-8, + "litellm_provider": "together_ai", + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "together-ai-embedding-up-to-150m": { + "input_cost_per_token": 8e-9, + "litellm_provider": "together_ai", + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "together_ai/baai/bge-base-en-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 768 + }, + "together_ai/BAAI/bge-base-en-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 768 + }, + "together-ai-up-to-4b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 1e-7 + }, + "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput": { + "input_cost_per_token": 2e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 262000, + "mode": "chat", + "output_cost_per_token": 6e-6, + "source": "https://www.together.ai/models/qwen3-235b-a22b-instruct-2507-fp8", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "input_cost_per_token": 6.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://www.together.ai/models/qwen3-235b-a22b-thinking-2507", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-235B-A22B-fp8-tput": { + "input_cost_per_token": 2e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://www.together.ai/models/qwen3-235b-a22b-fp8-tput", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_tool_choice": false + }, + "together_ai/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { + "input_cost_per_token": 2e-6, + "litellm_provider": "together_ai", + "max_input_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2e-6, + "source": "https://www.together.ai/models/qwen3-coder-480b-a35b-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-R1": { + "input_cost_per_token": 3e-6, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "max_output_tokens": 20480, + "max_tokens": 20480, + "mode": "chat", + "output_cost_per_token": 7e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-R1-0528-tput": { + "input_cost_per_token": 5.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.19e-6, + "source": "https://www.together.ai/models/deepseek-r1-0528-throughput", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-V3": { + "input_cost_per_token": 1.25e-6, + "litellm_provider": "together_ai", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-V3.1": { + "input_cost_per_token": 6e-7, + "litellm_provider": "together_ai", + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.7e-6, + "source": "https://www.together.ai/models/deepseek-v3-1", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-3.2-3B-Instruct-Turbo": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo": { + "input_cost_per_token": 8.8e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8.8e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": { + "input_cost_per_token": 0, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "input_cost_per_token": 2.7e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8.5e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 5.9e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { + "input_cost_per_token": 3.5e-6, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 3.5e-6, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "input_cost_per_token": 8.8e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8.8e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 1.8e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/mistralai/Mistral-7B-Instruct-v0.1": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/mistralai/Mistral-Small-24B-Instruct-2501": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 6e-7, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/moonshotai/Kimi-K2-Instruct": { + "input_cost_per_token": 1e-6, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://www.together.ai/models/kimi-k2-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/openai/gpt-oss-120b": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://www.together.ai/models/gpt-oss-120b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/openai/gpt-oss-20b": { + "input_cost_per_token": 5e-8, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://www.together.ai/models/gpt-oss-20b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/togethercomputer/CodeLlama-34b-Instruct": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/zai-org/GLM-4.5-Air-FP8": { + "input_cost_per_token": 2e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-6, + "source": "https://www.together.ai/models/glm-4-5-air", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/zai-org/GLM-4.6": { + "input_cost_per_token": 6e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 2.2e-6, + "source": "https://www.together.ai/models/glm-4-6", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "together_ai/moonshotai/Kimi-K2-Instruct-0905": { + "input_cost_per_token": 1e-6, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-6, + "source": "https://www.together.ai/models/kimi-k2-0905", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "tts-1": { + "input_cost_per_character": 1.5e-5, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "tts-1-hd": { + "input_cost_per_character": 3e-5, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "aws_polly/standard": { + "input_cost_per_character": 4e-6, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "aws_polly/neural": { + "input_cost_per_character": 1.6e-5, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "aws_polly/long-form": { + "input_cost_per_character": 0.0001, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "aws_polly/generative": { + "input_cost_per_character": 3e-5, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "us.amazon.nova-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.4e-7, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "us.amazon.nova-micro-v1:0": { + "input_cost_per_token": 3.5e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.4e-7, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "us.amazon.nova-premier-v1:0": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.25e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": false, + "supports_response_schema": true, + "supports_vision": true + }, + "us.amazon.nova-pro-v1:0": { + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.2e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "us.anthropic.claude-3-5-haiku-20241022-v1:0": { + "cache_creation_input_token_cost": 1e-6, + "cache_read_input_token_cost": 8e-8, + "input_cost_per_token": 8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "us.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-6, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-6, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-3-7-sonnet-20250219-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-3-opus-20240229-v1:0": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-opus-4-1-20250805-v1:0": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-6, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 3.3e-6, + "input_cost_per_token_above_200k_tokens": 6.6e-6, + "output_cost_per_token_above_200k_tokens": 2.475e-5, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "au.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-6, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-6, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "us.anthropic.claude-opus-4-20250514-v1:0": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.875e-6, + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 5.5e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "global.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "eu.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.deepseek.r1-v1:0": { + "input_cost_per_token": 1.35e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.4e-6, + "supports_function_calling": false, + "supports_reasoning": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-405b-instruct-v1:0": { + "input_cost_per_token": 5.32e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.6e-5, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-70b-instruct-v1:0": { + "input_cost_per_token": 9.9e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 9.9e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-8b-instruct-v1:0": { + "input_cost_per_token": 2.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.2e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-11b-instruct-v1:0": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.5e-7, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "us.meta.llama3-2-1b-instruct-v1:0": { + "input_cost_per_token": 1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-3b-instruct-v1:0": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-90b-instruct-v1:0": { + "input_cost_per_token": 2e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "us.meta.llama3-3-70b-instruct-v1:0": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.2e-7, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama4-maverick-17b-instruct-v1:0": { + "input_cost_per_token": 2.4e-7, + "input_cost_per_token_batches": 1.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 9.7e-7, + "output_cost_per_token_batches": 4.85e-7, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama4-scout-17b-instruct-v1:0": { + "input_cost_per_token": 1.7e-7, + "input_cost_per_token_batches": 8.5e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.6e-7, + "output_cost_per_token_batches": 3.3e-7, + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.mistral.pixtral-large-2502-v1:0": { + "input_cost_per_token": 2e-6, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "v0/v0-1.0-md": { + "input_cost_per_token": 3e-6, + "litellm_provider": "v0", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "v0/v0-1.5-lg": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "v0", + "max_input_tokens": 512000, + "max_output_tokens": 512000, + "max_tokens": 512000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "v0/v0-1.5-md": { + "input_cost_per_token": 3e-6, + "litellm_provider": "v0", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/alibaba/qwen-3-14b": { + "input_cost_per_token": 8e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.4e-7 + }, + "vercel_ai_gateway/alibaba/qwen-3-235b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "vercel_ai_gateway/alibaba/qwen-3-30b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-7 + }, + "vercel_ai_gateway/alibaba/qwen-3-32b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-7 + }, + "vercel_ai_gateway/alibaba/qwen3-coder": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 262144, + "max_output_tokens": 66536, + "max_tokens": 66536, + "mode": "chat", + "output_cost_per_token": 1.6e-6 + }, + "vercel_ai_gateway/amazon/nova-lite": { + "input_cost_per_token": 6e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 300000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.4e-7 + }, + "vercel_ai_gateway/amazon/nova-micro": { + "input_cost_per_token": 3.5e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.4e-7 + }, + "vercel_ai_gateway/amazon/nova-pro": { + "input_cost_per_token": 8e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 300000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.2e-6 + }, + "vercel_ai_gateway/amazon/titan-embed-text-v2": { + "input_cost_per_token": 2e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/anthropic/claude-3-haiku": { + "cache_creation_input_token_cost": 3e-7, + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6 + }, + "vercel_ai_gateway/anthropic/claude-3-opus": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5 + }, + "vercel_ai_gateway/anthropic/claude-3.5-haiku": { + "cache_creation_input_token_cost": 1e-6, + "cache_read_input_token_cost": 8e-8, + "input_cost_per_token": 8e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-6 + }, + "vercel_ai_gateway/anthropic/claude-3.5-sonnet": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/anthropic/claude-3.7-sonnet": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/anthropic/claude-4-opus": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5 + }, + "vercel_ai_gateway/anthropic/claude-4-sonnet": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/cohere/command-a": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1e-5 + }, + "vercel_ai_gateway/cohere/command-r": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "vercel_ai_gateway/cohere/command-r-plus": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-5 + }, + "vercel_ai_gateway/cohere/embed-v4.0": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/deepseek/deepseek-r1": { + "input_cost_per_token": 5.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.19e-6 + }, + "vercel_ai_gateway/deepseek/deepseek-r1-distill-llama-70b": { + "input_cost_per_token": 7.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 9.9e-7 + }, + "vercel_ai_gateway/deepseek/deepseek-v3": { + "input_cost_per_token": 9e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 9e-7 + }, + "vercel_ai_gateway/google/gemini-2.0-flash": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "vercel_ai_gateway/google/gemini-2.0-flash-lite": { + "input_cost_per_token": 7.5e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3e-7 + }, + "vercel_ai_gateway/google/gemini-2.5-flash": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1000000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 2.5e-6 + }, + "vercel_ai_gateway/google/gemini-2.5-pro": { + "input_cost_per_token": 2.5e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1e-5 + }, + "vercel_ai_gateway/google/gemini-embedding-001": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/google/gemma-2-9b": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-7 + }, + "vercel_ai_gateway/google/text-embedding-005": { + "input_cost_per_token": 2.5e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/google/text-multilingual-embedding-002": { + "input_cost_per_token": 2.5e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/inception/mercury-coder-small": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "vercel_ai_gateway/meta/llama-3-70b": { + "input_cost_per_token": 5.9e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.9e-7 + }, + "vercel_ai_gateway/meta/llama-3-8b": { + "input_cost_per_token": 5e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-8 + }, + "vercel_ai_gateway/meta/llama-3.1-70b": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-7 + }, + "vercel_ai_gateway/meta/llama-3.1-8b": { + "input_cost_per_token": 5e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131000, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-8 + }, + "vercel_ai_gateway/meta/llama-3.2-11b": { + "input_cost_per_token": 1.6e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.6e-7 + }, + "vercel_ai_gateway/meta/llama-3.2-1b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-7 + }, + "vercel_ai_gateway/meta/llama-3.2-3b": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-7 + }, + "vercel_ai_gateway/meta/llama-3.2-90b": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-7 + }, + "vercel_ai_gateway/meta/llama-3.3-70b": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-7 + }, + "vercel_ai_gateway/meta/llama-4-maverick": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "vercel_ai_gateway/meta/llama-4-scout": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3e-7 + }, + "vercel_ai_gateway/mistral/codestral": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 9e-7 + }, + "vercel_ai_gateway/mistral/codestral-embed": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/mistral/devstral-small": { + "input_cost_per_token": 7e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.8e-7 + }, + "vercel_ai_gateway/mistral/magistral-medium": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-6 + }, + "vercel_ai_gateway/mistral/magistral-small": { + "input_cost_per_token": 5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-6 + }, + "vercel_ai_gateway/mistral/ministral-3b": { + "input_cost_per_token": 4e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-8 + }, + "vercel_ai_gateway/mistral/ministral-8b": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1e-7 + }, + "vercel_ai_gateway/mistral/mistral-embed": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/mistral/mistral-large": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 6e-6 + }, + "vercel_ai_gateway/mistral/mistral-saba-24b": { + "input_cost_per_token": 7.9e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 7.9e-7 + }, + "vercel_ai_gateway/mistral/mistral-small": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 3e-7 + }, + "vercel_ai_gateway/mistral/mixtral-8x22b-instruct": { + "input_cost_per_token": 1.2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 65536, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.2e-6 + }, + "vercel_ai_gateway/mistral/pixtral-12b": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.5e-7 + }, + "vercel_ai_gateway/mistral/pixtral-large": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 6e-6 + }, + "vercel_ai_gateway/moonshotai/kimi-k2": { + "input_cost_per_token": 5.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.2e-6 + }, + "vercel_ai_gateway/morph/morph-v3-fast": { + "input_cost_per_token": 8e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-6 + }, + "vercel_ai_gateway/morph/morph-v3-large": { + "input_cost_per_token": 9e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.9e-6 + }, + "vercel_ai_gateway/openai/gpt-3.5-turbo": { + "input_cost_per_token": 5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-6 + }, + "vercel_ai_gateway/openai/gpt-3.5-turbo-instruct": { + "input_cost_per_token": 1.5e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-6 + }, + "vercel_ai_gateway/openai/gpt-4-turbo": { + "input_cost_per_token": 1e-5, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-5 + }, + "vercel_ai_gateway/openai/gpt-4.1": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-6 + }, + "vercel_ai_gateway/openai/gpt-4.1-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 4e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-6 + }, + "vercel_ai_gateway/openai/gpt-4.1-nano": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-7 + }, + "vercel_ai_gateway/openai/gpt-4o": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 1.25e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5 + }, + "vercel_ai_gateway/openai/gpt-4o-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7 + }, + "vercel_ai_gateway/openai/o1": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 7.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-5 + }, + "vercel_ai_gateway/openai/o3": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-6 + }, + "vercel_ai_gateway/openai/o3-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6 + }, + "vercel_ai_gateway/openai/o4-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 2.75e-7, + "input_cost_per_token": 1.1e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-6 + }, + "vercel_ai_gateway/openai/text-embedding-3-large": { + "input_cost_per_token": 1.3e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/openai/text-embedding-3-small": { + "input_cost_per_token": 2e-8, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/openai/text-embedding-ada-002": { + "input_cost_per_token": 1e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/perplexity/sonar": { + "input_cost_per_token": 1e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 127000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1e-6 + }, + "vercel_ai_gateway/perplexity/sonar-pro": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/perplexity/sonar-reasoning": { + "input_cost_per_token": 1e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 127000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 5e-6 + }, + "vercel_ai_gateway/perplexity/sonar-reasoning-pro": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 127000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 8e-6 + }, + "vercel_ai_gateway/vercel/v0-1.0-md": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/vercel/v0-1.5-md": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/xai/grok-2": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1e-5 + }, + "vercel_ai_gateway/xai/grok-2-vision": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-5 + }, + "vercel_ai_gateway/xai/grok-3": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/xai/grok-3-fast": { + "input_cost_per_token": 5e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-5 + }, + "vercel_ai_gateway/xai/grok-3-mini": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-7 + }, + "vercel_ai_gateway/xai/grok-3-mini-fast": { + "input_cost_per_token": 6e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-6 + }, + "vercel_ai_gateway/xai/grok-4": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-5 + }, + "vercel_ai_gateway/zai/glm-4.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.2e-6 + }, + "vercel_ai_gateway/zai/glm-4.5-air": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 96000, + "max_tokens": 96000, + "mode": "chat", + "output_cost_per_token": 1.1e-6 + }, + "vercel_ai_gateway/zai/glm-4.6": { + "litellm_provider": "vercel_ai_gateway", + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 4.5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.8e-6, + "source": "https://vercel.com/ai-gateway/models/glm-4.6", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/chirp": { + "input_cost_per_character": 3e-5, + "litellm_provider": "vertex_ai", + "mode": "audio_speech", + "source": "https://cloud.google.com/text-to-speech/pricing", + "supported_endpoints": ["/v1/audio/speech"] + }, + "vertex_ai/claude-3-5-haiku": { + "input_cost_per_token": 1e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true + }, + "vertex_ai/claude-3-5-haiku@20241022": { + "input_cost_per_token": 1e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true + }, + "vertex_ai/claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 1e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "vertex_ai/claude-3-5-sonnet": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-5-sonnet-v2": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-5-sonnet-v2@20241022": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-5-sonnet@20240620": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-7-sonnet@20250219": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "deprecation_date": "2025-06-01", + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-3-haiku": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-haiku@20240307": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-6, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-opus": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-opus@20240229": { + "input_cost_per_token": 1.5e-5, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-sonnet": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-sonnet@20240229": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-opus-4": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-opus-4-1": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "input_cost_per_token_batches": 7.5e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "output_cost_per_token_batches": 3.75e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-opus-4-1@20250805": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "input_cost_per_token_batches": 7.5e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "output_cost_per_token_batches": 3.75e-5, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-opus-4-5@20251101": { + "cache_creation_input_token_cost": 6.25e-6, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 5e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-sonnet-4-5": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token_batches": 1.5e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_batches": 7.5e-6, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-sonnet-4-5@20250929": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token_batches": 1.5e-6, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_batches": 7.5e-6, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-opus-4@20250514": { + "cache_creation_input_token_cost": 1.875e-5, + "cache_read_input_token_cost": 1.5e-6, + "input_cost_per_token": 1.5e-5, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-sonnet-4": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-sonnet-4@20250514": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/mistralai/codestral-2@001": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2@001": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/codestral-2": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2501": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral@2405": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral@latest": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/deepseek-ai/deepseek-v3.1-maas": { + "input_cost_per_token": 1.35e-6, + "litellm_provider": "vertex_ai-deepseek_models", + "max_input_tokens": 163840, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 5.4e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_regions": ["us-west2"], + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/deepseek-ai/deepseek-v3.2-maas": { + "input_cost_per_token": 5.6e-7, + "input_cost_per_token_batches": 2.8e-7, + "litellm_provider": "vertex_ai-deepseek_models", + "max_input_tokens": 163840, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "output_cost_per_token_batches": 8.4e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_regions": ["us-west2"], + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/deepseek-ai/deepseek-r1-0528-maas": { + "input_cost_per_token": 1.35e-6, + "litellm_provider": "vertex_ai-deepseek_models", + "max_input_tokens": 65336, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.4e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-5, + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 100000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation#edit-an-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": false, + "tpm": 8000000 + }, + "vertex_ai/gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_batches": 6e-6, + "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" + }, + "vertex_ai/imagegeneration@006": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-fast-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-generate-002": { + "deprecation_date": "2025-11-10", + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-capability-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/image/edit-insert-objects" + }, + "vertex_ai/imagen-4.0-fast-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-4.0-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-4.0-ultra-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/jamba-1.5": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-large": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-large@001": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-6, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-mini": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-mini@001": { + "input_cost_per_token": 2e-7, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-3.1-405b-instruct-maas": { + "input_cost_per_token": 5e-6, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.6e-5, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-3.1-70b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-3.1-8b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "metadata": { + "notes": "VertexAI states that The Llama 3.1 API service for llama-3.1-70b-instruct-maas and llama-3.1-8b-instruct-maas are in public preview and at no cost." + }, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "metadata": { + "notes": "VertexAI states that The Llama 3.2 API service is at no cost during public preview, and will be priced as per dollar-per-1M-tokens at GA." + }, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-4-maverick-17b-128e-instruct-maas": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 1.15e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-4-maverick-17b-16e-instruct-maas": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 1.15e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-4-scout-17b-128e-instruct-maas": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 10000000, + "max_output_tokens": 10000000, + "max_tokens": 10000000, + "mode": "chat", + "output_cost_per_token": 7e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 10000000, + "max_output_tokens": 10000000, + "max_tokens": 10000000, + "mode": "chat", + "output_cost_per_token": 7e-7, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama3-405b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_tool_choice": true + }, + "vertex_ai/meta/llama3-70b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_tool_choice": true + }, + "vertex_ai/meta/llama3-8b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_tool_choice": true + }, + "vertex_ai/minimaxai/minimax-m2-maas": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-minimax_models", + "max_input_tokens": 196608, + "max_output_tokens": 196608, + "max_tokens": 196608, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/moonshotai/kimi-k2-thinking-maas": { + "input_cost_per_token": 6e-7, + "litellm_provider": "vertex_ai-moonshot_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "vertex_ai/zai-org/glm-4.7-maas": { + "input_cost_per_token": 6e-7, + "litellm_provider": "vertex_ai-zai_models", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-medium-3": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-medium-3@001": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/mistral-medium-3": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/mistral-medium-3@001": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large-2411": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large@2407": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large@2411-001": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large@latest": { + "input_cost_per_token": 2e-6, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-nemo@2407": { + "input_cost_per_token": 3e-6, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-nemo@latest": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-small-2503": { + "input_cost_per_token": 1e-6, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/mistral-small-2503@001": { + "input_cost_per_token": 1e-6, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-6, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-ocr-2505": { + "litellm_provider": "vertex_ai", + "mode": "ocr", + "ocr_cost_per_page": 0.0005, + "supported_endpoints": ["/v1/ocr"], + "source": "https://cloud.google.com/generative-ai-app-builder/pricing" + }, + "vertex_ai/deepseek-ai/deepseek-ocr-maas": { + "litellm_provider": "vertex_ai", + "mode": "ocr", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.2e-6, + "ocr_cost_per_page": 0.0003, + "source": "https://cloud.google.com/vertex-ai/pricing" + }, + "vertex_ai/openai/gpt-oss-120b-maas": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-openai_models", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas", + "supports_reasoning": true + }, + "vertex_ai/openai/gpt-oss-20b-maas": { + "input_cost_per_token": 7.5e-8, + "litellm_provider": "vertex_ai-openai_models", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas", + "supports_reasoning": true + }, + "vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas": { + "input_cost_per_token": 1e-6, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/qwen/qwen3-next-80b-a3b-instruct-maas": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/qwen/qwen3-next-80b-a3b-thinking-maas": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.2e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/veo-2.0-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.35, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.0-fast-generate-preview": { + "deprecation_date": "2025-11-12", + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.0-generate-preview": { + "deprecation_date": "2025-11-12", + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.0-fast-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.0-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.1-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.1-fast-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.1-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.1-fast-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "voyage/rerank-2": { + "input_cost_per_token": 5e-8, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "max_query_tokens": 16000, + "max_tokens": 16000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/rerank-2-lite": { + "input_cost_per_token": 2e-8, + "litellm_provider": "voyage", + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "max_query_tokens": 8000, + "max_tokens": 8000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/rerank-2.5": { + "input_cost_per_token": 5e-8, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/rerank-2.5-lite": { + "input_cost_per_token": 2e-8, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-2": { + "input_cost_per_token": 1e-7, + "litellm_provider": "voyage", + "max_input_tokens": 4000, + "max_tokens": 4000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3": { + "input_cost_per_token": 6e-8, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3-large": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3-lite": { + "input_cost_per_token": 2e-8, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3.5": { + "input_cost_per_token": 6e-8, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3.5-lite": { + "input_cost_per_token": 2e-8, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-code-2": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_tokens": 16000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-code-3": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-context-3": { + "input_cost_per_token": 1.8e-7, + "litellm_provider": "voyage", + "max_input_tokens": 120000, + "max_tokens": 120000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-finance-2": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-large-2": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_tokens": 16000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-law-2": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_tokens": 16000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-lite-01": { + "input_cost_per_token": 1e-7, + "litellm_provider": "voyage", + "max_input_tokens": 4096, + "max_tokens": 4096, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-lite-02-instruct": { + "input_cost_per_token": 1e-7, + "litellm_provider": "voyage", + "max_input_tokens": 4000, + "max_tokens": 4000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-multimodal-3": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "wandb/openai/gpt-oss-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.015, + "output_cost_per_token": 0.06, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/openai/gpt-oss-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.005, + "output_cost_per_token": 0.02, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/zai-org/GLM-4.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.055, + "output_cost_per_token": 0.2, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/Qwen/Qwen3-235B-A22B-Instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 0.01, + "output_cost_per_token": 0.01, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/Qwen/Qwen3-Coder-480B-A35B-Instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 0.1, + "output_cost_per_token": 0.15, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 0.01, + "output_cost_per_token": 0.01, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/moonshotai/Kimi-K2-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.5e-6, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/meta-llama/Llama-3.1-8B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.022, + "output_cost_per_token": 0.022, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/deepseek-ai/DeepSeek-V3.1": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.055, + "output_cost_per_token": 0.165, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/deepseek-ai/DeepSeek-R1-0528": { + "max_tokens": 161000, + "max_input_tokens": 161000, + "max_output_tokens": 161000, + "input_cost_per_token": 0.135, + "output_cost_per_token": 0.54, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/deepseek-ai/DeepSeek-V3-0324": { + "max_tokens": 161000, + "max_input_tokens": 161000, + "max_output_tokens": 161000, + "input_cost_per_token": 0.114, + "output_cost_per_token": 0.275, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/meta-llama/Llama-3.3-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.071, + "output_cost_per_token": 0.071, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "max_tokens": 64000, + "max_input_tokens": 64000, + "max_output_tokens": 64000, + "input_cost_per_token": 0.017, + "output_cost_per_token": 0.066, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/microsoft/Phi-4-mini-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.008, + "output_cost_per_token": 0.035, + "litellm_provider": "wandb", + "mode": "chat" + }, + "watsonx/ibm/granite-3-8b-instruct": { + "input_cost_per_token": 2e-7, + "litellm_provider": "watsonx", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 2e-7, + "supports_audio_input": false, + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-large": { + "input_cost_per_token": 3e-6, + "litellm_provider": "watsonx", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_audio_input": false, + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "watsonx/bigscience/mt0-xxl-13b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/core42/jais-13b-chat": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/google/flan-t5-xl-3b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-chat-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-instruct-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-3-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-4-h-small": { + "max_tokens": 20480, + "max_input_tokens": 20480, + "max_output_tokens": 20480, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.5e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-3-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1024-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1536-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-512-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-vision-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-11b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-1b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-3b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-90b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-6, + "output_cost_per_token": 2e-6, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 7.1e-7, + "output_cost_per_token": 7.1e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-4-maverick-17b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 1.4e-6, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-guard-3-11b-vision": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/mistralai/mistral-medium-2505": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3e-6, + "output_cost_per_token": 1e-5, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-3-1-24b-instruct-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/openai/gpt-oss-120b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/sdaia/allam-1-13b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1.8e-6, + "output_cost_per_token": 1.8e-6, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/whisper-large-v3-turbo": { + "input_cost_per_second": 0.0001, + "output_cost_per_second": 0.0001, + "litellm_provider": "watsonx", + "mode": "audio_transcription", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "whisper-1": { + "input_cost_per_second": 0.0001, + "litellm_provider": "openai", + "mode": "audio_transcription", + "output_cost_per_second": 0.0001, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "xai/grok-2": { + "input_cost_per_token": 2e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-2-1212": { + "input_cost_per_token": 2e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-2-latest": { + "input_cost_per_token": 2e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-2-vision": { + "input_cost_per_image": 2e-6, + "input_cost_per_token": 2e-6, + "litellm_provider": "xai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-2-vision-1212": { + "input_cost_per_image": 2e-6, + "input_cost_per_token": 2e-6, + "litellm_provider": "xai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-2-vision-latest": { + "input_cost_per_image": 2e-6, + "input_cost_per_token": 2e-6, + "litellm_provider": "xai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-3": { + "input_cost_per_token": 3e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-beta": { + "input_cost_per_token": 3e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-fast-beta": { + "input_cost_per_token": 5e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-fast-latest": { + "input_cost_per_token": 5e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-5, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-latest": { + "input_cost_per_token": 3e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini": { + "input_cost_per_token": 3e-7, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-beta": { + "input_cost_per_token": 3e-7, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-fast": { + "input_cost_per_token": 6e-7, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-6, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-fast-beta": { + "input_cost_per_token": 6e-7, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-6, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-fast-latest": { + "input_cost_per_token": 6e-7, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-6, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-latest": { + "input_cost_per_token": 3e-7, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4": { + "input_cost_per_token": 3e-6, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-fast-reasoning": { + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 1e-6, + "cache_read_input_token_cost": 5e-8, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-fast-non-reasoning": { + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "cache_read_input_token_cost": 5e-8, + "max_tokens": 2000000.0, + "mode": "chat", + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-0709": { + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_128k_tokens": 6e-6, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_above_128k_tokens": 3e-5, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-latest": { + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_128k_tokens": 6e-6, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_above_128k_tokens": 3e-5, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-reasoning": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-reasoning-latest": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-non-reasoning": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-non-reasoning-latest": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-beta": { + "input_cost_per_token": 5e-6, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-code-fast": { + "cache_read_input_token_cost": 2e-8, + "input_cost_per_token": 2e-7, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "xai/grok-code-fast-1": { + "cache_read_input_token_cost": 2e-8, + "input_cost_per_token": 2e-7, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "xai/grok-code-fast-1-0825": { + "cache_read_input_token_cost": 2e-8, + "input_cost_per_token": 2e-7, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-6, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "xai/grok-vision-beta": { + "input_cost_per_image": 5e-6, + "input_cost_per_token": 5e-6, + "litellm_provider": "xai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "zai/glm-4.7": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.2e-6, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.6": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.2e-6, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.2e-6, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5v": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 1.8e-6, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-x": { + "input_cost_per_token": 2.2e-6, + "output_cost_per_token": 8.9e-6, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-air": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 1.1e-6, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-airx": { + "input_cost_per_token": 1.1e-6, + "output_cost_per_token": 4.5e-6, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4-32b-0414-128k": { + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-flash": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "vertex_ai/search_api": { + "input_cost_per_query": 0.0015, + "litellm_provider": "vertex_ai", + "mode": "vector_store" + }, + "openai/container": { + "code_interpreter_cost_per_session": 0.03, + "litellm_provider": "openai", + "mode": "chat" + }, + "openai/sora-2": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "openai/sora-2-pro": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "azure/sora-2": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "azure/sora-2-pro": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "azure/sora-2-pro-high-res": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.5, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1024x1792", "1792x1024"] + }, + "runwayml/gen4_turbo": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], + "metadata": { + "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" + } + }, + "runwayml/gen4_aleph": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.15, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], + "metadata": { + "comment": "15 credits per second @ $0.01 per credit = $0.15 per second" + } + }, + "runwayml/gen3a_turbo": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], + "metadata": { + "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" + } + }, + "runwayml/gen4_image": { + "litellm_provider": "runwayml", + "mode": "image_generation", + "input_cost_per_image": 0.05, + "output_cost_per_image": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["image"], + "supported_resolutions": ["1280x720", "1920x1080"], + "metadata": { + "comment": "5 credits per 720p image or 8 credits per 1080p image @ $0.01 per credit. Using 5 credits ($0.05) as base cost" + } + }, + "runwayml/gen4_image_turbo": { + "litellm_provider": "runwayml", + "mode": "image_generation", + "input_cost_per_image": 0.02, + "output_cost_per_image": 0.02, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["image"], + "supported_resolutions": ["1280x720", "1920x1080"], + "metadata": { + "comment": "2 credits per image (any resolution) @ $0.01 per credit = $0.02 per image" + } + }, + "runwayml/eleven_multilingual_v2": { + "litellm_provider": "runwayml", + "mode": "audio_speech", + "input_cost_per_character": 3e-7, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "metadata": { + "comment": "Estimated cost based on standard TTS pricing. RunwayML uses ElevenLabs models." + } + }, + "fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-a35b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 4.5e-7, + "output_cost_per_token": 1.8e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/flux-kontext-pro": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 4e-8, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/SSD-1B": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/chronos-hermes-13b-v2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-13b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-13b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-13b-python": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-34b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-34b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-34b-python": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-70b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-70b-python": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-7b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-7b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-7b-python": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-qwen-1p5-7b": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/codegemma-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/codegemma-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-671b-v2-p1": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-llama-3b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-llama-70b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-llama-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-qwen-14b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-qwen-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-kontext-max": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 8e-8, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/dbrx-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-1b-base": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-33b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-7b-base": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-7b-base-v1p5": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-7b-instruct-v1p5": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-lite-base": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-lite-instruct": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-prover-v2": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-0528-distill-qwen3-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-llama-70b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-llama-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-14b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-1p5b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-7b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v2-lite-chat": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v2p5": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/devstral-small-2505": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dobby-mini-unhinged-plus-llama-3-1-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dobby-unhinged-llama-3-3-70b-new": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dolphin-2-9-2-qwen2-72b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dolphin-2p6-mixtral-8x7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ernie-4p5-21b-a3b-pt": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ernie-4p5-300b-a47b-pt": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/fare-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/firefunction-v1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/firellava-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/firesearch-ocr-v6": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/fireworks-asr-large": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/fireworks-asr-v2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-dev": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-dev-controlnet-union": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-9, + "output_cost_per_token": 1e-9, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-dev-fp8": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 5e-10, + "output_cost_per_token": 5e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-schnell": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-schnell-fp8": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 3.5e-10, + "output_cost_per_token": 3.5e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/gemma-2b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma-3-27b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma-7b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma2-9b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/glm-4p5v": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-safeguard-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-safeguard-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/hermes-2-pro-mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/internvl3-38b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/internvl3-78b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/internvl3-8b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/japanese-stable-diffusion-xl": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/kat-coder": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/kat-dev-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/kat-dev-72b-exp": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-guard-2-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-guard-3-1b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-guard-3-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-13b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-70b-chat": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-7b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct-hf": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-8b-instruct-hf": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct-long": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct-1b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-nemotron-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llamaguard-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llava-yi-34b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/minimax-m1-80k": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/minimax-m2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ministral-3-14b-instruct-2512": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ministral-3-3b-instruct-2512": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ministral-3-8b-instruct-2512": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-instruct-4k": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-instruct-v0p2": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-instruct-v3": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-v0p2": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-large-3-fp8": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-nemo-base-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-nemo-instruct-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-small-24b-instruct-2501": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 1.2e-6, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x7b-instruct-hf": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mythomax-l2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nemotron-nano-v2-12b-vl": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-capybara-7b-v1p9": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-2-mixtral-8x7b-dpo": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-2-yi-34b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-llama2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-llama2-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-llama2-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nvidia-nemotron-nano-12b-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nvidia-nemotron-nano-9b-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openchat-3p5-0106-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openhermes-2-mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openhermes-2p5-mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openorca-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phi-2-3b": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phi-3-mini-128k-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phi-3-vision-128k-instruct": { + "max_tokens": 32064, + "max_input_tokens": 32064, + "max_output_tokens": 32064, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phind-code-llama-34b-python-v1": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phind-code-llama-34b-v1": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phind-code-llama-34b-v2": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/playground-v2-1024px-aesthetic": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/playground-v2-5-1024px-aesthetic": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/pythia-12b": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen-qwq-32b-preview": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen-v2p5-14b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen-v2p5-7b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen1p5-72b-chat": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-vl-2b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-vl-72b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-vl-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-0p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-14b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-1p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-32b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-72b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-72b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-0p5b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-0p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-14b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-14b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-1p5b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-1p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-128k": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-32k-rope": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-64k": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-3b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-3b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-math-72b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-32b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-3b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-72b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-7b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-0p6b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-14b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b-fp8-draft": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b-fp8-draft-131072": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b-fp8-draft-40960": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 8.8e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 8.8e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 8.8e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-30b-a3b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-30b-a3b-instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-30b-a3b-thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/qwen3-4b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-4b-instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-8b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/qwen3-coder-30b-a3b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-instruct-bf16": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-embedding-0p6b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "embedding" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-embedding-4b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "embedding" + }, + "fireworks_ai/accounts/fireworks/models/": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "embedding" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-next-80b-a3b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-next-80b-a3b-thinking": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-reranker-0p6b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "rerank" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-reranker-4b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "rerank" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-reranker-8b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "rerank" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-235b-a22b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 8.8e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-235b-a22b-thinking": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 8.8e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-30b-a3b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-30b-a3b-thinking": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-32b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-8b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwq-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/rolm-ocr": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/snorkel-mistral-7b-pairrm-dpo": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/stable-diffusion-xl-1024-v1-0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/stablecode-3b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder-16b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder2-15b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder2-3b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder2-7b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/toppy-m-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/whisper-v3": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/whisper-v3-turbo": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/yi-34b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/yi-34b-200k-capybara": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/yi-34b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/yi-6b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/zephyr-7b-beta": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "novita/deepseek/deepseek-v3.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.69e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 163840, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.345e-7, + "input_cost_per_token_cache_hit": 1.345e-7, + "supports_reasoning": true + }, + "novita/minimax/minimax-m2.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.2e-6, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token_cache_hit": 3e-8 + }, + "novita/zai-org/glm-4.7": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.2e-6, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/xiaomimimo/mimo-v2-flash": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, + "max_input_tokens": 262144, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 2e-8, + "input_cost_per_token_cache_hit": 2e-8, + "supports_reasoning": true + }, + "novita/zai-org/autoglm-phone-9b-multilingual": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.5e-8, + "output_cost_per_token": 1.38e-7, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/moonshotai/kimi-k2-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.5e-6, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/minimax/minimax-m2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.2e-6, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token_cache_hit": 3e-8, + "supports_reasoning": true + }, + "novita/paddlepaddle/paddleocr-vl": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 2e-8, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-v3.2-exp": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 4.1e-7, + "max_input_tokens": 163840, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-235b-a22b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9.8e-7, + "output_cost_per_token": 3.95e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/zai-org/glm-4.6v": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 5.5e-8, + "input_cost_per_token_cache_hit": 5.5e-8, + "supports_reasoning": true + }, + "novita/zai-org/glm-4.6": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 2.2e-6, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/kwaipilot/kat-coder-pro": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.2e-6, + "max_input_tokens": 256000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_token_cache_hit": 6e-8 + }, + "novita/qwen/qwen3-next-80b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-next-80b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-ocr": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3.1-terminus": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 1e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-7, + "input_cost_per_token_cache_hit": 1.35e-7, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-235b-a22b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.5e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-max": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.11e-6, + "output_cost_per_token": 8.45e-6, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/skywork/r1v4-lite": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 1e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-7, + "input_cost_per_token_cache_hit": 1.35e-7, + "supports_reasoning": true + }, + "novita/moonshotai/kimi-k2-0905": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.5e-6, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-coder-480b-a35b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 1.3e-6, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-coder-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.7e-7, + "max_input_tokens": 160000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/openai/gpt-oss-120b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/moonshotai/kimi-k2-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.7e-7, + "output_cost_per_token": 2.3e-6, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3-0324": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 1.12e-6, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-7, + "input_cost_per_token_cache_hit": 1.35e-7 + }, + "novita/zai-org/glm-4.5": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 2.2e-6, + "max_input_tokens": 131072, + "max_output_tokens": 98304, + "max_tokens": 98304, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/qwen/qwen3-235b-a22b-thinking-2507": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3.1-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_system_messages": true + }, + "novita/google/gemma-3-12b-it": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/zai-org/glm-4.5v": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 1.8e-6, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/openai/gpt-oss-20b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-235b-a22b-instruct-2507": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 5.8e-7, + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-r1-distill-qwen-14b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, + "max_input_tokens": 32768, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3.3-70b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.35e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 120000, + "max_tokens": 120000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen-2.5-72b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/mistralai/mistral-nemo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.7e-7, + "max_input_tokens": 60288, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/minimaxai/minimax-m1-80k": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 2.2e-6, + "max_input_tokens": 1000000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-0528": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 2.5e-6, + "max_input_tokens": 163840, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 3.5e-7, + "input_cost_per_token_cache_hit": 3.5e-7, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-distill-qwen-32b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "max_input_tokens": 64000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 4e-8, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_system_messages": true + }, + "novita/microsoft/wizardlm-2-8x22b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6.2e-7, + "output_cost_per_token": 6.2e-7, + "max_input_tokens": 65535, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-r1-0528-qwen3-8b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-8, + "output_cost_per_token": 9e-8, + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-distill-llama-70b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3-70b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.1e-7, + "output_cost_per_token": 7.4e-7, + "max_input_tokens": 8192, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-235b-a22b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 8e-7, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-4-maverick-17b-128e-instruct-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 8.5e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/meta-llama/llama-4-scout-17b-16e-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/nousresearch/hermes-2-pro-llama-3-8b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 1.4e-7, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen2.5-vl-72b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/sao10k/l3-70b-euryale-v2.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.48e-6, + "output_cost_per_token": 1.48e-6, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-21B-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/sao10k/l3-8b-lunaris": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/baichuan/baichuan-m2-32b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-424b-a47b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4.2e-7, + "output_cost_per_token": 1.25e-6, + "max_input_tokens": 123000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/baidu/ernie-4.5-300b-a47b-paddle": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.8e-7, + "output_cost_per_token": 1.1e-6, + "max_input_tokens": 123000, + "max_output_tokens": 12000, + "max_tokens": 12000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-prover-v2-671b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 2.5e-6, + "max_input_tokens": 160000, + "max_output_tokens": 160000, + "max_tokens": 160000, + "supports_system_messages": true + }, + "novita/qwen/qwen3-32b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4.5e-7, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-30b-a3b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 4.5e-7, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/google/gemma-3-27b-it": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.19e-7, + "output_cost_per_token": 2e-7, + "max_input_tokens": 98304, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-v3-turbo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 1.3e-6, + "max_input_tokens": 64000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-r1-turbo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 2.5e-6, + "max_input_tokens": 64000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/Sao10K/L3-8B-Stheno-v3.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 8192, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/gryphe/mythomax-l2-13b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 9e-8, + "max_input_tokens": 4096, + "max_output_tokens": 3200, + "max_tokens": 3200, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-28b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.9e-7, + "output_cost_per_token": 3.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-8, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/zai-org/glm-4.5-air": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 8.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 98304, + "max_tokens": 98304, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 7e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-vl-30b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 1e-6, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-omni-30b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 9.7e-7, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_audio_input": true + }, + "novita/qwen/qwen3-omni-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 9.7e-7, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_audio_input": true, + "supports_audio_output": true + }, + "novita/qwen/qwen-mt-plus": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 16384, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-28b-a3b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 5.6e-7, + "max_input_tokens": 30000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/baidu/ernie-4.5-21B-a3b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, + "max_input_tokens": 120000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen3-8b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.5e-8, + "output_cost_per_token": 1.38e-7, + "max_input_tokens": 128000, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-4b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 128000, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen2.5-7b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/meta-llama/llama-3.2-3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 32768, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/sao10k/l31-70b-euryale-v2.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.48e-6, + "output_cost_per_token": 1.48e-6, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen3-embedding-0.6b": { + "litellm_provider": "novita", + "mode": "embedding", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 0, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "novita/qwen/qwen3-embedding-8b": { + "litellm_provider": "novita", + "mode": "embedding", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 0, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "novita/baai/bge-m3": { + "litellm_provider": "novita", + "mode": "embedding", + "input_cost_per_token": 1e-8, + "output_cost_per_token": 1e-8, + "max_input_tokens": 8192, + "max_output_tokens": 96000, + "max_tokens": 96000 + }, + "novita/qwen/qwen3-reranker-8b": { + "litellm_provider": "novita", + "mode": "rerank", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "novita/baai/bge-reranker-v2-m3": { + "litellm_provider": "novita", + "mode": "rerank", + "input_cost_per_token": 1e-8, + "output_cost_per_token": 1e-8, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "max_tokens": 8000 + }, + "llamagate/llama-3.1-8b": { + "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-8, + "output_cost_per_token": 5e-8, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/llama-3.2-3b": { + "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 8e-8, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/mistral-7b-v0.3": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/qwen3-8b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.4e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/dolphin3-8b": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/deepseek-r1-8b": { + "max_tokens": 16384, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/deepseek-r1-7b-qwen": { + "max_tokens": 16384, + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/openthinker-7b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/qwen2.5-coder-7b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 1.2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/deepseek-coder-6.7b": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 1.2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/codellama-7b": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 1.2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/qwen3-vl-8b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 5.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/llava-7b": { + "max_tokens": 2048, + "max_input_tokens": 4096, + "max_output_tokens": 2048, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/gemma3-4b": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-8, + "output_cost_per_token": 8e-8, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/nomic-embed-text": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 0, + "litellm_provider": "llamagate", + "mode": "embedding" + }, + "llamagate/qwen3-embedding-8b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 0, + "litellm_provider": "llamagate", + "mode": "embedding" + } +} diff --git a/letta/orm/__init__.py b/letta/orm/__init__.py index 310e8d2f..72e1112a 100644 --- a/letta/orm/__init__.py +++ b/letta/orm/__init__.py @@ -31,6 +31,7 @@ from letta.orm.prompt import Prompt from letta.orm.provider import Provider from letta.orm.provider_model import ProviderModel from letta.orm.provider_trace import ProviderTrace +from letta.orm.provider_trace_metadata import ProviderTraceMetadata from letta.orm.run import Run from letta.orm.run_metrics import RunMetrics from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, SandboxEnvironmentVariable diff --git a/letta/orm/archive.py b/letta/orm/archive.py index 16e0fddf..932e37af 100644 --- a/letta/orm/archive.py +++ b/letta/orm/archive.py @@ -46,8 +46,8 @@ class Archive(SqlalchemyBase, OrganizationMixin): default=VectorDBProvider.NATIVE, doc="The vector database provider used for this archive's passages", ) - embedding_config: Mapped[dict] = mapped_column( - EmbeddingConfigColumn, nullable=False, doc="Embedding configuration for passages in this archive" + embedding_config: Mapped[Optional[dict]] = mapped_column( + EmbeddingConfigColumn, nullable=True, doc="Embedding configuration for passages in this archive" ) metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True, doc="Additional metadata for the archive") _vector_db_namespace: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="Private field for vector database namespace") diff --git a/letta/orm/passage.py b/letta/orm/passage.py index 404830f2..4bbe9509 100644 --- a/letta/orm/passage.py +++ b/letta/orm/passage.py @@ -25,18 +25,18 @@ class BasePassage(SqlalchemyBase, OrganizationMixin): id: Mapped[str] = mapped_column(primary_key=True, doc="Unique passage identifier") text: Mapped[str] = mapped_column(doc="Passage text content") - embedding_config: Mapped[dict] = mapped_column(EmbeddingConfigColumn, doc="Embedding configuration") + embedding_config: Mapped[Optional[dict]] = mapped_column(EmbeddingConfigColumn, nullable=True, doc="Embedding configuration") metadata_: Mapped[dict] = mapped_column(JSON, doc="Additional metadata") # dual storage: json column for fast retrieval, junction table for efficient queries tags: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True, doc="Tags associated with this passage") - # Vector embedding field based on database type + # Vector embedding field based on database type - nullable for text-only search if settings.database_engine is DatabaseChoice.POSTGRES: from pgvector.sqlalchemy import Vector - embedding = mapped_column(Vector(MAX_EMBEDDING_DIM)) + embedding = mapped_column(Vector(MAX_EMBEDDING_DIM), nullable=True) else: - embedding = Column(CommonVector) + embedding = Column(CommonVector, nullable=True) @declared_attr def organization(cls) -> Mapped["Organization"]: diff --git a/letta/orm/provider.py b/letta/orm/provider.py index bd42a1be..f784caa1 100644 --- a/letta/orm/provider.py +++ b/letta/orm/provider.py @@ -1,6 +1,7 @@ +from datetime import datetime from typing import TYPE_CHECKING, Optional -from sqlalchemy import ForeignKey, String, Text, UniqueConstraint +from sqlalchemy import DateTime, ForeignKey, String, Text, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column, relationship from letta.orm.mixins import OrganizationMixin @@ -41,6 +42,11 @@ class Provider(SqlalchemyBase, OrganizationMixin): api_key_enc: Mapped[Optional[str]] = mapped_column(Text, nullable=True, doc="Encrypted API key or secret key for the provider.") access_key_enc: Mapped[Optional[str]] = mapped_column(Text, nullable=True, doc="Encrypted access key for the provider.") + # sync tracking + last_synced: Mapped[Optional[datetime]] = mapped_column( + DateTime(timezone=True), nullable=True, doc="Last time models were synced for this provider." + ) + # relationships organization: Mapped["Organization"] = relationship("Organization", back_populates="providers") models: Mapped[list["ProviderModel"]] = relationship("ProviderModel", back_populates="provider", cascade="all, delete-orphan") diff --git a/letta/orm/provider_trace.py b/letta/orm/provider_trace.py index b0cbb181..90399b5d 100644 --- a/letta/orm/provider_trace.py +++ b/letta/orm/provider_trace.py @@ -32,5 +32,15 @@ class ProviderTrace(SqlalchemyBase, OrganizationMixin): String, nullable=True, doc="Source service that generated this trace (memgpt-server, lettuce-py)" ) + # v2 protocol fields + org_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the organization") + user_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the user who initiated the request") + compaction_settings: Mapped[Optional[dict]] = mapped_column( + JSON, nullable=True, doc="Compaction/summarization settings (summarization calls only)" + ) + llm_config: Mapped[Optional[dict]] = mapped_column( + JSON, nullable=True, doc="LLM configuration used for this call (non-summarization calls only)" + ) + # Relationships organization: Mapped["Organization"] = relationship("Organization", lazy="selectin") diff --git a/letta/orm/provider_trace_metadata.py b/letta/orm/provider_trace_metadata.py new file mode 100644 index 00000000..5d8fecf7 --- /dev/null +++ b/letta/orm/provider_trace_metadata.py @@ -0,0 +1,45 @@ +import uuid +from datetime import datetime +from typing import Optional + +from sqlalchemy import JSON, DateTime, Index, String, UniqueConstraint, func +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from letta.orm.mixins import OrganizationMixin +from letta.orm.sqlalchemy_base import SqlalchemyBase +from letta.schemas.provider_trace import ProviderTraceMetadata as PydanticProviderTraceMetadata + + +class ProviderTraceMetadata(SqlalchemyBase, OrganizationMixin): + """Metadata-only provider trace storage (no request/response JSON).""" + + __tablename__ = "provider_trace_metadata" + __pydantic_model__ = PydanticProviderTraceMetadata + __table_args__ = ( + Index("ix_provider_trace_metadata_step_id", "step_id"), + UniqueConstraint("id", name="uq_provider_trace_metadata_id"), + ) + + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), primary_key=True, server_default=func.now(), doc="Timestamp when the trace was created" + ) + id: Mapped[str] = mapped_column( + String, primary_key=True, doc="Unique provider trace identifier", default=lambda: f"provider_trace-{uuid.uuid4()}" + ) + step_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the step that this trace is associated with") + + # Telemetry context fields + agent_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the agent that generated this trace") + agent_tags: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, doc="Tags associated with the agent for filtering") + call_type: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="Type of call (agent_step, summarization, etc.)") + run_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the run this trace is associated with") + source: Mapped[Optional[str]] = mapped_column( + String, nullable=True, doc="Source service that generated this trace (memgpt-server, lettuce-py)" + ) + + # v2 protocol fields + org_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the organization") + user_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the user who initiated the request") + + # Relationships + organization: Mapped["Organization"] = relationship("Organization", lazy="selectin") diff --git a/letta/orm/sqlalchemy_base.py b/letta/orm/sqlalchemy_base.py index 1276684c..c012c54a 100644 --- a/letta/orm/sqlalchemy_base.py +++ b/letta/orm/sqlalchemy_base.py @@ -42,7 +42,9 @@ def handle_db_timeout(func): logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}") raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e) except QueryCanceledError as e: - logger.error(f"Query canceled (statement timeout) while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}") + logger.error( + f"Query canceled (statement timeout) while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}" + ) raise DatabaseTimeoutError(message=f"Query canceled due to statement timeout in {func.__name__}.", original_exception=e) return wrapper @@ -56,7 +58,9 @@ def handle_db_timeout(func): logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}") raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e) except QueryCanceledError as e: - logger.error(f"Query canceled (statement timeout) while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}") + logger.error( + f"Query canceled (statement timeout) while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}" + ) raise DatabaseTimeoutError(message=f"Query canceled due to statement timeout in {func.__name__}.", original_exception=e) return async_wrapper @@ -207,6 +211,10 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base): """ Constructs the query for listing records. """ + # Security check: if the model has organization_id column, actor should be provided + if actor is None and hasattr(cls, "organization_id"): + logger.warning(f"SECURITY: Listing org-scoped model {cls.__name__} without actor. This bypasses organization filtering.") + query = select(cls) if join_model and join_conditions: @@ -446,6 +454,14 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base): ): logger.debug(f"Reading {cls.__name__} with ID(s): {identifiers} with actor={actor}") + # Security check: if the model has organization_id column, actor should be provided + # to ensure proper org-scoping. Log a warning if actor is None. + if actor is None and hasattr(cls, "organization_id"): + logger.warning( + f"SECURITY: Reading org-scoped model {cls.__name__} without actor. " + f"IDs: {identifiers}. This bypasses organization filtering." + ) + # Start the query query = select(cls) # Collect query conditions for better error reporting @@ -681,6 +697,12 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base): **kwargs, ): logger.debug(f"Calculating size for {cls.__name__} with filters {kwargs}") + + # Security check: if the model has organization_id column, actor should be provided + if actor is None and hasattr(cls, "organization_id"): + logger.warning( + f"SECURITY: Calculating size for org-scoped model {cls.__name__} without actor. This bypasses organization filtering." + ) query = select(func.count(1)).select_from(cls) if actor: diff --git a/letta/schemas/archive.py b/letta/schemas/archive.py index 08c863d9..f0d0348f 100644 --- a/letta/schemas/archive.py +++ b/letta/schemas/archive.py @@ -17,7 +17,7 @@ class ArchiveBase(OrmMetadataBase): vector_db_provider: VectorDBProvider = Field( default=VectorDBProvider.NATIVE, description="The vector database provider used for this archive's passages" ) - embedding_config: EmbeddingConfig = Field(..., description="Embedding configuration for passages in this archive") + embedding_config: Optional[EmbeddingConfig] = Field(None, description="Embedding configuration for passages in this archive") metadata: Optional[Dict] = Field(default_factory=dict, validation_alias="metadata_", description="Additional metadata") diff --git a/letta/schemas/enums.py b/letta/schemas/enums.py index 41cc4d9c..96efb446 100644 --- a/letta/schemas/enums.py +++ b/letta/schemas/enums.py @@ -63,11 +63,14 @@ class ProviderType(str, Enum): hugging_face = "hugging-face" letta = "letta" lmstudio_openai = "lmstudio_openai" + minimax = "minimax" mistral = "mistral" ollama = "ollama" openai = "openai" together = "together" vllm = "vllm" + sglang = "sglang" + openrouter = "openrouter" xai = "xai" zai = "zai" diff --git a/letta/schemas/group.py b/letta/schemas/group.py index 0006c17e..ef7760a8 100644 --- a/letta/schemas/group.py +++ b/letta/schemas/group.py @@ -5,6 +5,7 @@ from pydantic import BaseModel, Field from letta.schemas.enums import PrimitiveType from letta.schemas.letta_base import LettaBase +from letta.validators import AgentId, BlockId class ManagerType(str, Enum): @@ -93,43 +94,43 @@ class RoundRobinManagerUpdate(ManagerConfig): class SupervisorManager(ManagerConfig): manager_type: Literal[ManagerType.supervisor] = Field(ManagerType.supervisor, description="") - manager_agent_id: str = Field(..., description="") + manager_agent_id: AgentId = Field(..., description="") class SupervisorManagerUpdate(ManagerConfig): manager_type: Literal[ManagerType.supervisor] = Field(ManagerType.supervisor, description="") - manager_agent_id: Optional[str] = Field(..., description="") + manager_agent_id: Optional[AgentId] = Field(..., description="") class DynamicManager(ManagerConfig): manager_type: Literal[ManagerType.dynamic] = Field(ManagerType.dynamic, description="") - manager_agent_id: str = Field(..., description="") + manager_agent_id: AgentId = Field(..., description="") termination_token: Optional[str] = Field("DONE!", description="") max_turns: Optional[int] = Field(None, description="") class DynamicManagerUpdate(ManagerConfig): manager_type: Literal[ManagerType.dynamic] = Field(ManagerType.dynamic, description="") - manager_agent_id: Optional[str] = Field(None, description="") + manager_agent_id: Optional[AgentId] = Field(None, description="") termination_token: Optional[str] = Field(None, description="") max_turns: Optional[int] = Field(None, description="") class SleeptimeManager(ManagerConfig): manager_type: Literal[ManagerType.sleeptime] = Field(ManagerType.sleeptime, description="") - manager_agent_id: str = Field(..., description="") + manager_agent_id: AgentId = Field(..., description="") sleeptime_agent_frequency: Optional[int] = Field(None, description="") class SleeptimeManagerUpdate(ManagerConfig): manager_type: Literal[ManagerType.sleeptime] = Field(ManagerType.sleeptime, description="") - manager_agent_id: Optional[str] = Field(None, description="") + manager_agent_id: Optional[AgentId] = Field(None, description="") sleeptime_agent_frequency: Optional[int] = Field(None, description="") class VoiceSleeptimeManager(ManagerConfig): manager_type: Literal[ManagerType.voice_sleeptime] = Field(ManagerType.voice_sleeptime, description="") - manager_agent_id: str = Field(..., description="") + manager_agent_id: AgentId = Field(..., description="") max_message_buffer_length: Optional[int] = Field( None, description="The desired maximum length of messages in the context window of the convo agent. This is a best effort, and may be off slightly due to user/assistant interleaving.", @@ -142,7 +143,7 @@ class VoiceSleeptimeManager(ManagerConfig): class VoiceSleeptimeManagerUpdate(ManagerConfig): manager_type: Literal[ManagerType.voice_sleeptime] = Field(ManagerType.voice_sleeptime, description="") - manager_agent_id: Optional[str] = Field(None, description="") + manager_agent_id: Optional[AgentId] = Field(None, description="") max_message_buffer_length: Optional[int] = Field( None, description="The desired maximum length of messages in the context window of the convo agent. This is a best effort, and may be off slightly due to user/assistant interleaving.", @@ -170,11 +171,11 @@ ManagerConfigUpdateUnion = Annotated[ class GroupCreate(BaseModel): - agent_ids: List[str] = Field(..., description="") + agent_ids: List[AgentId] = Field(..., description="") description: str = Field(..., description="") manager_config: ManagerConfigUnion = Field(RoundRobinManager(), description="") project_id: Optional[str] = Field(None, description="The associated project id.") - shared_block_ids: List[str] = Field([], description="", deprecated=True) + shared_block_ids: List[BlockId] = Field([], description="", deprecated=True) hidden: Optional[bool] = Field( None, description="If set to True, the group will be hidden.", @@ -190,8 +191,8 @@ class InternalTemplateGroupCreate(GroupCreate): class GroupUpdate(BaseModel): - agent_ids: Optional[List[str]] = Field(None, description="") + agent_ids: Optional[List[AgentId]] = Field(None, description="") description: Optional[str] = Field(None, description="") manager_config: Optional[ManagerConfigUpdateUnion] = Field(None, description="") project_id: Optional[str] = Field(None, description="The associated project id.") - shared_block_ids: Optional[List[str]] = Field(None, description="", deprecated=True) + shared_block_ids: Optional[List[BlockId]] = Field(None, description="", deprecated=True) diff --git a/letta/schemas/identity.py b/letta/schemas/identity.py index bb4f429d..c5602be4 100644 --- a/letta/schemas/identity.py +++ b/letta/schemas/identity.py @@ -5,6 +5,7 @@ from pydantic import Field from letta.schemas.enums import PrimitiveType from letta.schemas.letta_base import LettaBase +from letta.validators import AgentId, BlockId class IdentityType(str, Enum): @@ -57,8 +58,8 @@ class IdentityCreate(LettaBase): name: str = Field(..., description="The name of the identity.") identity_type: IdentityType = Field(..., description="The type of the identity.") project_id: Optional[str] = Field(None, description="The project id of the identity, if applicable.") - agent_ids: Optional[List[str]] = Field(None, description="The agent ids that are associated with the identity.", deprecated=True) - block_ids: Optional[List[str]] = Field(None, description="The IDs of the blocks associated with the identity.", deprecated=True) + agent_ids: Optional[List[AgentId]] = Field(None, description="The agent ids that are associated with the identity.", deprecated=True) + block_ids: Optional[List[BlockId]] = Field(None, description="The IDs of the blocks associated with the identity.", deprecated=True) properties: Optional[List[IdentityProperty]] = Field(None, description="List of properties associated with the identity.") @@ -67,8 +68,8 @@ class IdentityUpsert(LettaBase): name: str = Field(..., description="The name of the identity.") identity_type: IdentityType = Field(..., description="The type of the identity.") project_id: Optional[str] = Field(None, description="The project id of the identity, if applicable.") - agent_ids: Optional[List[str]] = Field(None, description="The agent ids that are associated with the identity.", deprecated=True) - block_ids: Optional[List[str]] = Field(None, description="The IDs of the blocks associated with the identity.", deprecated=True) + agent_ids: Optional[List[AgentId]] = Field(None, description="The agent ids that are associated with the identity.", deprecated=True) + block_ids: Optional[List[BlockId]] = Field(None, description="The IDs of the blocks associated with the identity.", deprecated=True) properties: Optional[List[IdentityProperty]] = Field(None, description="List of properties associated with the identity.") @@ -76,8 +77,8 @@ class IdentityUpdate(LettaBase): identifier_key: Optional[str] = Field(None, description="External, user-generated identifier key of the identity.") name: Optional[str] = Field(None, description="The name of the identity.") identity_type: Optional[IdentityType] = Field(None, description="The type of the identity.") - agent_ids: Optional[List[str]] = Field(None, description="The agent ids that are associated with the identity.", deprecated=True) - block_ids: Optional[List[str]] = Field(None, description="The IDs of the blocks associated with the identity.", deprecated=True) + agent_ids: Optional[List[AgentId]] = Field(None, description="The agent ids that are associated with the identity.", deprecated=True) + block_ids: Optional[List[BlockId]] = Field(None, description="The IDs of the blocks associated with the identity.", deprecated=True) properties: Optional[List[IdentityProperty]] = Field(None, description="List of properties associated with the identity.") diff --git a/letta/schemas/letta_message.py b/letta/schemas/letta_message.py index a403460c..b90628dd 100644 --- a/letta/schemas/letta_message.py +++ b/letta/schemas/letta_message.py @@ -7,8 +7,10 @@ from pydantic import BaseModel, Field, field_serializer, field_validator from letta.schemas.letta_message_content import ( LettaAssistantMessageContentUnion, + LettaToolReturnContentUnion, LettaUserMessageContentUnion, get_letta_assistant_message_content_union_str_json_schema, + get_letta_tool_return_content_union_str_json_schema, get_letta_user_message_content_union_str_json_schema, ) @@ -35,7 +37,11 @@ class ApprovalReturn(MessageReturn): class ToolReturn(MessageReturn): type: Literal[MessageReturnType.tool] = Field(default=MessageReturnType.tool, description="The message type to be created.") - tool_return: str + tool_return: Union[str, List[LettaToolReturnContentUnion]] = Field( + ..., + description="The tool return value - either a string or list of content parts (text/image)", + json_schema_extra=get_letta_tool_return_content_union_str_json_schema(), + ) status: Literal["success", "error"] tool_call_id: str stdout: Optional[List[str]] = None @@ -563,6 +569,10 @@ class SystemMessageListResult(UpdateSystemMessage): default=None, description="The unique identifier of the agent that owns the message.", ) + conversation_id: str | None = Field( + default=None, + description="The unique identifier of the conversation that the message belongs to.", + ) created_at: datetime = Field(..., description="The time the message was created in ISO format.") @@ -581,6 +591,10 @@ class UserMessageListResult(UpdateUserMessage): default=None, description="The unique identifier of the agent that owns the message.", ) + conversation_id: str | None = Field( + default=None, + description="The unique identifier of the conversation that the message belongs to.", + ) created_at: datetime = Field(..., description="The time the message was created in ISO format.") @@ -599,6 +613,10 @@ class ReasoningMessageListResult(UpdateReasoningMessage): default=None, description="The unique identifier of the agent that owns the message.", ) + conversation_id: str | None = Field( + default=None, + description="The unique identifier of the conversation that the message belongs to.", + ) created_at: datetime = Field(..., description="The time the message was created in ISO format.") @@ -617,6 +635,10 @@ class AssistantMessageListResult(UpdateAssistantMessage): default=None, description="The unique identifier of the agent that owns the message.", ) + conversation_id: str | None = Field( + default=None, + description="The unique identifier of the conversation that the message belongs to.", + ) created_at: datetime = Field(..., description="The time the message was created in ISO format.") diff --git a/letta/schemas/letta_message_content.py b/letta/schemas/letta_message_content.py index 24265777..7c62ebd3 100644 --- a/letta/schemas/letta_message_content.py +++ b/letta/schemas/letta_message_content.py @@ -138,6 +138,48 @@ def get_letta_user_message_content_union_str_json_schema(): } +# ------------------------------- +# Tool Return Content Types +# ------------------------------- + + +LettaToolReturnContentUnion = Annotated[ + Union[TextContent, ImageContent], + Field(discriminator="type"), +] + + +def create_letta_tool_return_content_union_schema(): + return { + "oneOf": [ + {"$ref": "#/components/schemas/TextContent"}, + {"$ref": "#/components/schemas/ImageContent"}, + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "text": "#/components/schemas/TextContent", + "image": "#/components/schemas/ImageContent", + }, + }, + } + + +def get_letta_tool_return_content_union_str_json_schema(): + """Schema that accepts either string or list of content parts for tool returns.""" + return { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/components/schemas/LettaToolReturnContentUnion", + }, + }, + {"type": "string"}, + ], + } + + # ------------------------------- # Assistant Content Types # ------------------------------- diff --git a/letta/schemas/letta_request.py b/letta/schemas/letta_request.py index 69c245b1..9290cc38 100644 --- a/letta/schemas/letta_request.py +++ b/letta/schemas/letta_request.py @@ -7,6 +7,7 @@ from letta.constants import DEFAULT_MAX_STEPS, DEFAULT_MESSAGE_TOOL, DEFAULT_MES from letta.schemas.letta_message import MessageType from letta.schemas.letta_message_content import LettaMessageContentUnion from letta.schemas.message import MessageCreate, MessageCreateUnion, MessageRole +from letta.validators import AgentId class ClientToolSchema(BaseModel): @@ -125,12 +126,33 @@ class LettaStreamingRequest(LettaRequest): ) +class ConversationMessageRequest(LettaRequest): + """Request for sending messages to a conversation. Streams by default.""" + + streaming: bool = Field( + default=True, + description="If True (default), returns a streaming response (Server-Sent Events). If False, returns a complete JSON response.", + ) + stream_tokens: bool = Field( + default=False, + description="Flag to determine if individual tokens should be streamed, rather than streaming per step (only used when streaming=true).", + ) + include_pings: bool = Field( + default=True, + description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts (only used when streaming=true).", + ) + background: bool = Field( + default=False, + description="Whether to process the request in the background (only used when streaming=true).", + ) + + class LettaAsyncRequest(LettaRequest): callback_url: Optional[str] = Field(None, description="Optional callback URL to POST to when the job completes") class LettaBatchRequest(LettaRequest): - agent_id: str = Field(..., description="The ID of the agent to send this batch request for") + agent_id: AgentId = Field(..., description="The ID of the agent to send this batch request for") class CreateBatch(BaseModel): diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 5ce041f8..4a62b2c1 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -43,12 +43,14 @@ class LLMConfig(BaseModel): "koboldcpp", "vllm", "hugging-face", + "minimax", "mistral", "together", # completions endpoint "bedrock", "deepseek", "xai", "zai", + "openrouter", "chatgpt_oauth", ] = Field(..., description="The endpoint type for the model.") model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.") @@ -320,9 +322,10 @@ class LLMConfig(BaseModel): GoogleAIModelSettings, GoogleVertexModelSettings, GroqModelSettings, - Model, + ModelSettings, OpenAIModelSettings, OpenAIReasoning, + OpenRouterModelSettings, TogetherModelSettings, XAIModelSettings, ZAIModelSettings, @@ -395,15 +398,30 @@ class LLMConfig(BaseModel): max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, ) + elif self.model_endpoint_type == "openrouter": + return OpenRouterModelSettings( + max_output_tokens=self.max_tokens or 4096, + temperature=self.temperature, + ) elif self.model_endpoint_type == "chatgpt_oauth": return ChatGPTOAuthModelSettings( max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, reasoning=ChatGPTOAuthReasoning(reasoning_effort=self.reasoning_effort or "medium"), ) + elif self.model_endpoint_type == "minimax": + # MiniMax uses Anthropic-compatible API + thinking_type = "enabled" if self.enable_reasoner else "disabled" + return AnthropicModelSettings( + max_output_tokens=self.max_tokens or 4096, + temperature=self.temperature, + thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024), + verbosity=self.verbosity, + strict=self.strict, + ) else: - # If we don't know the model type, use the default Model schema - return Model(max_output_tokens=self.max_tokens or 4096) + # If we don't know the model type, use the base ModelSettings schema + return ModelSettings(max_output_tokens=self.max_tokens or 4096) @classmethod def is_openai_reasoning_model(cls, config: "LLMConfig") -> bool: diff --git a/letta/schemas/message.py b/letta/schemas/message.py index 0af6d2f0..22ed104a 100644 --- a/letta/schemas/message.py +++ b/letta/schemas/message.py @@ -50,6 +50,7 @@ from letta.schemas.letta_message_content import ( ImageContent, ImageSourceType, LettaMessageContentUnion, + LettaToolReturnContentUnion, OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, @@ -71,6 +72,34 @@ def truncate_tool_return(content: Optional[str], limit: Optional[int]) -> Option return content[:limit] + f"... [truncated {len(content) - limit} chars]" +def _get_text_from_part(part: Union[TextContent, ImageContent, dict]) -> Optional[str]: + """Extract text from a content part, returning None for images.""" + if isinstance(part, TextContent): + return part.text + elif isinstance(part, dict) and part.get("type") == "text": + return part.get("text", "") + return None + + +def tool_return_to_text(func_response: Optional[Union[str, List]]) -> Optional[str]: + """Convert tool return content to text, replacing images with placeholders.""" + if func_response is None: + return None + if isinstance(func_response, str): + return func_response + + text_parts = [text for part in func_response if (text := _get_text_from_part(part))] + image_count = sum( + 1 for part in func_response if isinstance(part, ImageContent) or (isinstance(part, dict) and part.get("type") == "image") + ) + + result = "\n".join(text_parts) + if image_count > 0: + placeholder = "[Image omitted]" if image_count == 1 else f"[{image_count} images omitted]" + result = (result + " " + placeholder) if result else placeholder + return result if result else None + + def add_inner_thoughts_to_tool_call( tool_call: OpenAIToolCall, inner_thoughts: str, @@ -366,6 +395,7 @@ class Message(BaseMessage): message_type=lm.message_type, content=lm.content, agent_id=message.agent_id, + conversation_id=message.conversation_id, created_at=message.created_at, ) ) @@ -376,6 +406,7 @@ class Message(BaseMessage): message_type=lm.message_type, content=lm.content, agent_id=message.agent_id, + conversation_id=message.conversation_id, created_at=message.created_at, ) ) @@ -386,6 +417,7 @@ class Message(BaseMessage): message_type=lm.message_type, reasoning=lm.reasoning, agent_id=message.agent_id, + conversation_id=message.conversation_id, created_at=message.created_at, ) ) @@ -396,6 +428,7 @@ class Message(BaseMessage): message_type=lm.message_type, content=lm.content, agent_id=message.agent_id, + conversation_id=message.conversation_id, created_at=message.created_at, ) ) @@ -786,8 +819,14 @@ class Message(BaseMessage): for tool_return in self.tool_returns: parsed_data = self._parse_tool_response(tool_return.func_response) + # Preserve multi-modal content (ToolReturn supports Union[str, List]) + if isinstance(tool_return.func_response, list): + tool_return_value = tool_return.func_response + else: + tool_return_value = parsed_data["message"] + tool_return_obj = LettaToolReturn( - tool_return=parsed_data["message"], + tool_return=tool_return_value, status=parsed_data["status"], tool_call_id=tool_return.tool_call_id, stdout=tool_return.stdout, @@ -801,11 +840,18 @@ class Message(BaseMessage): first_tool_return = all_tool_returns[0] + # Convert deprecated string-only field to text (preserve images in tool_returns list) + deprecated_tool_return_text = ( + tool_return_to_text(first_tool_return.tool_return) + if isinstance(first_tool_return.tool_return, list) + else first_tool_return.tool_return + ) + return ToolReturnMessage( id=self.id, date=self.created_at, # deprecated top-level fields populated from first tool return - tool_return=first_tool_return.tool_return, + tool_return=deprecated_tool_return_text, status=first_tool_return.status, tool_call_id=first_tool_return.tool_call_id, stdout=first_tool_return.stdout, @@ -840,11 +886,11 @@ class Message(BaseMessage): """Check if message has exactly one text content item.""" return self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent) - def _parse_tool_response(self, response_text: str) -> dict: + def _parse_tool_response(self, response_text: Union[str, List]) -> dict: """Parse tool response JSON and extract message and status. Args: - response_text: Raw JSON response text + response_text: Raw JSON response text OR list of content parts (for multi-modal) Returns: Dictionary with 'message' and 'status' keys @@ -852,6 +898,14 @@ class Message(BaseMessage): Raises: ValueError: If JSON parsing fails """ + # Handle multi-modal content (list with text/images) + if isinstance(response_text, list): + text_representation = tool_return_to_text(response_text) or "[Multi-modal content]" + return { + "message": text_representation, + "status": "success", + } + try: function_return = parse_json(response_text) return { @@ -1301,7 +1355,9 @@ class Message(BaseMessage): tool_return = self.tool_returns[0] if not tool_return.tool_call_id: raise TypeError("OpenAI API requires tool_call_id to be set.") - func_response = truncate_tool_return(tool_return.func_response, tool_return_truncation_chars) + # Convert to text first (replaces images with placeholders), then truncate + func_response_text = tool_return_to_text(tool_return.func_response) + func_response = truncate_tool_return(func_response_text, tool_return_truncation_chars) openai_message = { "content": func_response, "role": self.role, @@ -1356,8 +1412,9 @@ class Message(BaseMessage): for tr in m.tool_returns: if not tr.tool_call_id: raise TypeError("ToolReturn came back without a tool_call_id.") - # Ensure explicit tool_returns are truncated for Chat Completions - func_response = truncate_tool_return(tr.func_response, tool_return_truncation_chars) + # Convert multi-modal to text (images → placeholders), then truncate + func_response_text = tool_return_to_text(tr.func_response) + func_response = truncate_tool_return(func_response_text, tool_return_truncation_chars) result.append( { "content": func_response, @@ -1418,7 +1475,10 @@ class Message(BaseMessage): message_dicts.append(user_dict) elif self.role == "assistant" or self.role == "approval": - assert self.tool_calls is not None or (self.content is not None and len(self.content) > 0) + # Validate that message has content OpenAI Responses API can process + if self.tool_calls is None and (self.content is None or len(self.content) == 0): + # Skip this message (similar to Anthropic handling at line 1308) + return message_dicts # A few things may be in here, firstly reasoning content, secondly assistant messages, thirdly tool calls # TODO check if OpenAI Responses is capable of R->A->T like Anthropic? @@ -1456,17 +1516,17 @@ class Message(BaseMessage): ) elif self.role == "tool": - # Handle tool returns - similar pattern to Anthropic + # Handle tool returns - supports images via content arrays if self.tool_returns: for tool_return in self.tool_returns: if not tool_return.tool_call_id: raise TypeError("OpenAI Responses API requires tool_call_id to be set.") - func_response = truncate_tool_return(tool_return.func_response, tool_return_truncation_chars) + output = self._tool_return_to_responses_output(tool_return.func_response, tool_return_truncation_chars) message_dicts.append( { "type": "function_call_output", "call_id": tool_return.tool_call_id[:max_tool_id_length] if max_tool_id_length else tool_return.tool_call_id, - "output": func_response, + "output": output, } ) else: @@ -1534,6 +1594,50 @@ class Message(BaseMessage): return None + @staticmethod + def _image_dict_to_data_url(part: dict) -> Optional[str]: + """Convert image dict to data URL.""" + source = part.get("source", {}) + if source.get("type") == "base64" and source.get("data"): + media_type = source.get("media_type", "image/png") + return f"data:{media_type};base64,{source['data']}" + elif source.get("type") == "url": + return source.get("url") + return None + + @staticmethod + def _tool_return_to_responses_output( + func_response: Optional[Union[str, List]], + tool_return_truncation_chars: Optional[int] = None, + ) -> Union[str, List[dict]]: + """Convert tool return to OpenAI Responses API format.""" + if func_response is None: + return "" + if isinstance(func_response, str): + return truncate_tool_return(func_response, tool_return_truncation_chars) or "" + + output_parts: List[dict] = [] + for part in func_response: + if isinstance(part, TextContent): + text = truncate_tool_return(part.text, tool_return_truncation_chars) or "" + output_parts.append({"type": "input_text", "text": text}) + elif isinstance(part, ImageContent): + image_url = Message._image_source_to_data_url(part) + if image_url: + detail = getattr(part.source, "detail", None) or "auto" + output_parts.append({"type": "input_image", "image_url": image_url, "detail": detail}) + elif isinstance(part, dict): + if part.get("type") == "text": + text = truncate_tool_return(part.get("text", ""), tool_return_truncation_chars) or "" + output_parts.append({"type": "input_text", "text": text}) + elif part.get("type") == "image": + image_url = Message._image_dict_to_data_url(part) + if image_url: + detail = part.get("source", {}).get("detail", "auto") + output_parts.append({"type": "input_image", "image_url": image_url, "detail": detail}) + + return output_parts if output_parts else "" + @staticmethod def to_openai_responses_dicts_from_list( messages: List[Message], @@ -1550,6 +1654,68 @@ class Message(BaseMessage): ) return result + @staticmethod + def _get_base64_image_data(part: Union[ImageContent, dict]) -> Optional[tuple[str, str]]: + """Extract base64 data and media type from ImageContent or dict.""" + if isinstance(part, ImageContent): + source = part.source + if source.type == ImageSourceType.base64: + return source.data, source.media_type + elif source.type == ImageSourceType.letta and getattr(source, "data", None): + return source.data, getattr(source, "media_type", None) or "image/png" + elif isinstance(part, dict) and part.get("type") == "image": + source = part.get("source", {}) + if source.get("type") == "base64" and source.get("data"): + return source["data"], source.get("media_type", "image/png") + return None + + @staticmethod + def _tool_return_to_google_parts( + func_response: Optional[Union[str, List]], + tool_return_truncation_chars: Optional[int] = None, + ) -> tuple[str, List[dict]]: + """Extract text and image parts for Google API format.""" + if isinstance(func_response, str): + return truncate_tool_return(func_response, tool_return_truncation_chars) or "", [] + + text_parts = [] + image_parts = [] + for part in func_response: + if text := _get_text_from_part(part): + text_parts.append(text) + elif image_data := Message._get_base64_image_data(part): + data, media_type = image_data + image_parts.append({"inlineData": {"data": data, "mimeType": media_type}}) + + text = truncate_tool_return("\n".join(text_parts), tool_return_truncation_chars) or "" + if image_parts: + suffix = f"[{len(image_parts)} image(s) attached]" + text = f"{text}\n{suffix}" if text else suffix + + return text, image_parts + + @staticmethod + def _tool_return_to_anthropic_content( + func_response: Optional[Union[str, List]], + tool_return_truncation_chars: Optional[int] = None, + ) -> Union[str, List[dict]]: + """Convert tool return to Anthropic tool_result content format.""" + if func_response is None: + return "" + if isinstance(func_response, str): + return truncate_tool_return(func_response, tool_return_truncation_chars) or "" + + content: List[dict] = [] + for part in func_response: + if text := _get_text_from_part(part): + text = truncate_tool_return(text, tool_return_truncation_chars) or "" + content.append({"type": "text", "text": text}) + elif image_data := Message._get_base64_image_data(part): + data, media_type = image_data + content.append({"type": "image", "source": {"type": "base64", "data": data, "media_type": media_type}}) + + return content if content else "" + def to_anthropic_dict( self, current_model: str, @@ -1628,8 +1794,11 @@ class Message(BaseMessage): } elif self.role == "assistant" or self.role == "approval": - # assert self.tool_calls is not None or text_content is not None, vars(self) - assert self.tool_calls is not None or len(self.content) > 0 + # Validate that message has content Anthropic API can process + if self.tool_calls is None and (self.content is None or len(self.content) == 0): + # Skip this message (consistent with OpenAI dict handling) + return None + anthropic_message = { "role": "assistant", } @@ -1759,12 +1928,13 @@ class Message(BaseMessage): f"Message ID: {self.id}, Tool: {self.name or 'unknown'}, " f"Tool return index: {idx}/{len(self.tool_returns)}" ) - func_response = truncate_tool_return(tool_return.func_response, tool_return_truncation_chars) + # Convert to Anthropic format (supports images) + tool_result_content = self._tool_return_to_anthropic_content(tool_return.func_response, tool_return_truncation_chars) content.append( { "type": "tool_result", "tool_use_id": resolved_tool_call_id, - "content": func_response, + "content": tool_result_content, } ) if content: @@ -1884,7 +2054,16 @@ class Message(BaseMessage): } elif self.role == "assistant" or self.role == "approval": - assert self.tool_calls is not None or text_content is not None or len(self.content) > 1 + # Validate that message has content Google API can process + if self.tool_calls is None and text_content is None and len(self.content) <= 1: + # Message has no tool calls, no extractable text, and not multi-part + logger.warning( + f"Assistant/approval message {self.id} has no content Google API can convert: " + f"tool_calls={self.tool_calls}, text_content={text_content}, content={self.content}" + ) + # Return None to skip this message (similar to approval messages without tool_calls at line 1998) + return None + google_ai_message = { "role": "model", # NOTE: different } @@ -2003,7 +2182,7 @@ class Message(BaseMessage): elif self.role == "tool": # NOTE: Significantly different tool calling format, more similar to function calling format - # Handle tool returns - similar pattern to Anthropic + # Handle tool returns - Google supports images as sibling inlineData parts if self.tool_returns: parts = [] for tool_return in self.tool_returns: @@ -2013,26 +2192,24 @@ class Message(BaseMessage): # Use the function name if available, otherwise use tool_call_id function_name = self.name if self.name else tool_return.tool_call_id - # Truncate the tool return if needed - func_response = truncate_tool_return(tool_return.func_response, tool_return_truncation_chars) + text_content, image_parts = Message._tool_return_to_google_parts( + tool_return.func_response, tool_return_truncation_chars + ) - # NOTE: Google AI API wants the function response as JSON only, no string try: - function_response = parse_json(func_response) + function_response = parse_json(text_content) except: - function_response = {"function_response": func_response} + function_response = {"function_response": text_content} parts.append( { "functionResponse": { "name": function_name, - "response": { - "name": function_name, # NOTE: name twice... why? - "content": function_response, - }, + "response": {"name": function_name, "content": function_response}, } } ) + parts.extend(image_parts) google_ai_message = { "role": "function", @@ -2325,7 +2502,9 @@ class ToolReturn(BaseModel): status: Literal["success", "error"] = Field(..., description="The status of the tool call") stdout: Optional[List[str]] = Field(default=None, description="Captured stdout (e.g. prints, logs) from the tool invocation") stderr: Optional[List[str]] = Field(default=None, description="Captured stderr from the tool invocation") - func_response: Optional[str] = Field(None, description="The function response string") + func_response: Optional[Union[str, List[LettaToolReturnContentUnion]]] = Field( + None, description="The function response - either a string or list of content parts (text/image)" + ) class MessageSearchRequest(BaseModel): diff --git a/letta/schemas/model.py b/letta/schemas/model.py index cb2926e9..f5d5fdac 100644 --- a/letta/schemas/model.py +++ b/letta/schemas/model.py @@ -42,12 +42,14 @@ class Model(LLMConfig, ModelBase): "koboldcpp", "vllm", "hugging-face", + "minimax", "mistral", "together", "bedrock", "deepseek", "xai", "zai", + "openrouter", "chatgpt_oauth", ] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True) context_window: int = Field( @@ -138,6 +140,7 @@ class Model(LLMConfig, ModelBase): ProviderType.deepseek: DeepseekModelSettings, ProviderType.together: TogetherModelSettings, ProviderType.bedrock: BedrockModelSettings, + ProviderType.openrouter: OpenRouterModelSettings, } settings_class = PROVIDER_SETTINGS_MAP.get(self.provider_type) @@ -456,6 +459,23 @@ class BedrockModelSettings(ModelSettings): } +class OpenRouterModelSettings(ModelSettings): + """OpenRouter model configuration (OpenAI-compatible).""" + + provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.") + temperature: float = Field(0.7, description="The temperature of the model.") + response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") + + def _to_legacy_config_params(self) -> dict: + return { + "temperature": self.temperature, + "max_tokens": self.max_output_tokens, + "response_format": self.response_format, + "parallel_tool_calls": self.parallel_tool_calls, + "strict": False, # OpenRouter does not support strict mode + } + + class ChatGPTOAuthReasoning(BaseModel): """Reasoning configuration for ChatGPT OAuth models (GPT-5.x, o-series).""" @@ -495,6 +515,7 @@ ModelSettingsUnion = Annotated[ DeepseekModelSettings, TogetherModelSettings, BedrockModelSettings, + OpenRouterModelSettings, ChatGPTOAuthModelSettings, ], Field(discriminator="provider_type"), diff --git a/letta/schemas/passage.py b/letta/schemas/passage.py index 39dae6b0..6cb7423b 100644 --- a/letta/schemas/passage.py +++ b/letta/schemas/passage.py @@ -44,7 +44,7 @@ class Passage(PassageBase): embedding: Optional[List[float]] = Field(..., description="The embedding of the passage.") embedding_config: Optional[EmbeddingConfig] = Field(..., description="The embedding configuration used by the passage.") - created_at: datetime = Field(default_factory=get_utc_time, description="The creation date of the passage.") + created_at: Optional[datetime] = Field(default_factory=get_utc_time, description="The creation date of the passage.") @field_validator("embedding", mode="before") @classmethod @@ -83,6 +83,7 @@ class PassageCreate(PassageBase): # optionally provide embeddings embedding: Optional[List[float]] = Field(None, description="The embedding of the passage.") embedding_config: Optional[EmbeddingConfig] = Field(None, description="The embedding configuration used by the passage.") + created_at: Optional[datetime] = Field(None, description="Optional creation datetime for the passage.") class PassageUpdate(PassageCreate): diff --git a/letta/schemas/provider_trace.py b/letta/schemas/provider_trace.py index 10ca5c3a..0f4202e8 100644 --- a/letta/schemas/provider_trace.py +++ b/letta/schemas/provider_trace.py @@ -29,6 +29,9 @@ class ProviderTrace(BaseProviderTrace): run_id (str): ID of the run this trace is associated with. source (str): Source service that generated this trace (memgpt-server, lettuce-py). organization_id (str): The unique identifier of the organization. + user_id (str): The unique identifier of the user who initiated the request. + compaction_settings (Dict[str, Any]): Compaction/summarization settings (only for summarization calls). + llm_config (Dict[str, Any]): LLM configuration used for this call (only for non-summarization calls). created_at (datetime): The timestamp when the object was created. """ @@ -44,4 +47,30 @@ class ProviderTrace(BaseProviderTrace): run_id: Optional[str] = Field(None, description="ID of the run this trace is associated with") source: Optional[str] = Field(None, description="Source service that generated this trace (memgpt-server, lettuce-py)") + # v2 protocol fields + org_id: Optional[str] = Field(None, description="ID of the organization") + user_id: Optional[str] = Field(None, description="ID of the user who initiated the request") + compaction_settings: Optional[Dict[str, Any]] = Field(None, description="Compaction/summarization settings (summarization calls only)") + llm_config: Optional[Dict[str, Any]] = Field(None, description="LLM configuration used for this call (non-summarization calls only)") + + created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.") + + +class ProviderTraceMetadata(BaseProviderTrace): + """Metadata-only representation of a provider trace (no request/response JSON).""" + + id: str = BaseProviderTrace.generate_id_field() + step_id: Optional[str] = Field(None, description="ID of the step that this trace is associated with") + + # Telemetry context fields + agent_id: Optional[str] = Field(None, description="ID of the agent that generated this trace") + agent_tags: Optional[list[str]] = Field(None, description="Tags associated with the agent for filtering") + call_type: Optional[str] = Field(None, description="Type of call (agent_step, summarization, etc.)") + run_id: Optional[str] = Field(None, description="ID of the run this trace is associated with") + source: Optional[str] = Field(None, description="Source service that generated this trace (memgpt-server, lettuce-py)") + + # v2 protocol fields + org_id: Optional[str] = Field(None, description="ID of the organization") + user_id: Optional[str] = Field(None, description="ID of the user who initiated the request") + created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.") diff --git a/letta/schemas/providers/__init__.py b/letta/schemas/providers/__init__.py index 6e3f5187..2790ba7e 100644 --- a/letta/schemas/providers/__init__.py +++ b/letta/schemas/providers/__init__.py @@ -12,10 +12,12 @@ from .google_vertex import GoogleVertexProvider from .groq import GroqProvider from .letta import LettaProvider from .lmstudio import LMStudioOpenAIProvider +from .minimax import MiniMaxProvider from .mistral import MistralProvider from .ollama import OllamaProvider from .openai import OpenAIProvider from .openrouter import OpenRouterProvider +from .sglang import SGLangProvider from .together import TogetherProvider from .vllm import VLLMProvider from .xai import XAIProvider @@ -40,11 +42,13 @@ __all__ = [ "GroqProvider", "LettaProvider", "LMStudioOpenAIProvider", + "MiniMaxProvider", "MistralProvider", "OllamaProvider", "OpenAIProvider", "TogetherProvider", "VLLMProvider", # Replaces ChatCompletions and Completions + "SGLangProvider", "XAIProvider", "ZAIProvider", "OpenRouterProvider", diff --git a/letta/schemas/providers/base.py b/letta/schemas/providers/base.py index 44e63f46..73e4a239 100644 --- a/letta/schemas/providers/base.py +++ b/letta/schemas/providers/base.py @@ -32,6 +32,7 @@ class Provider(ProviderBase): api_version: str | None = Field(None, description="API version used for requests to the provider.") organization_id: str | None = Field(None, description="The organization id of the user") updated_at: datetime | None = Field(None, description="The last update timestamp of the provider.") + last_synced: datetime | None = Field(None, description="The last time models were synced for this provider.") # Encrypted fields (stored as Secret objects, serialized to strings for DB) # Secret class handles validation and serialization automatically via __get_pydantic_core_schema__ @@ -191,9 +192,12 @@ class Provider(ProviderBase): GroqProvider, LettaProvider, LMStudioOpenAIProvider, + MiniMaxProvider, MistralProvider, OllamaProvider, OpenAIProvider, + OpenRouterProvider, + SGLangProvider, TogetherProvider, VLLMProvider, XAIProvider, @@ -224,6 +228,8 @@ class Provider(ProviderBase): return OllamaProvider(**self.model_dump(exclude_none=True)) case ProviderType.vllm: return VLLMProvider(**self.model_dump(exclude_none=True)) # Removed support for CompletionsProvider + case ProviderType.sglang: + return SGLangProvider(**self.model_dump(exclude_none=True)) case ProviderType.mistral: return MistralProvider(**self.model_dump(exclude_none=True)) case ProviderType.deepseek: @@ -240,6 +246,10 @@ class Provider(ProviderBase): return LMStudioOpenAIProvider(**self.model_dump(exclude_none=True)) case ProviderType.bedrock: return BedrockProvider(**self.model_dump(exclude_none=True)) + case ProviderType.minimax: + return MiniMaxProvider(**self.model_dump(exclude_none=True)) + case ProviderType.openrouter: + return OpenRouterProvider(**self.model_dump(exclude_none=True)) case _: raise ValueError(f"Unknown provider type: {self.provider_type}") diff --git a/letta/schemas/providers/bedrock.py b/letta/schemas/providers/bedrock.py index 7f833ad8..b226b0fe 100644 --- a/letta/schemas/providers/bedrock.py +++ b/letta/schemas/providers/bedrock.py @@ -18,6 +18,7 @@ logger = get_logger(__name__) class BedrockProvider(Provider): provider_type: Literal[ProviderType.bedrock] = Field(ProviderType.bedrock, description="The type of the provider.") provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)") + base_url: str = Field("bedrock", description="Identifier for Bedrock endpoint (used for model_endpoint)") access_key: str | None = Field(None, description="AWS access key ID for Bedrock") api_key: str | None = Field(None, description="AWS secret access key for Bedrock") region: str = Field(..., description="AWS region for Bedrock") @@ -99,7 +100,7 @@ class BedrockProvider(Provider): LLMConfig( model=model_name, model_endpoint_type=self.provider_type.value, - model_endpoint=None, + model_endpoint="bedrock", context_window=self.get_model_context_window(inference_profile_id), # Store the full inference profile ID in the handle for API calls handle=self.get_handle(inference_profile_id), diff --git a/letta/schemas/providers/minimax.py b/letta/schemas/providers/minimax.py new file mode 100644 index 00000000..488c578e --- /dev/null +++ b/letta/schemas/providers/minimax.py @@ -0,0 +1,105 @@ +from typing import Literal + +import anthropic +from pydantic import Field + +from letta.errors import ErrorCode, LLMAuthenticationError, LLMError +from letta.log import get_logger +from letta.schemas.enums import ProviderCategory, ProviderType +from letta.schemas.llm_config import LLMConfig +from letta.schemas.providers.base import Provider + +logger = get_logger(__name__) + +# MiniMax model specifications from official documentation +# https://platform.minimax.io/docs/guides/models-intro +MODEL_LIST = [ + { + "name": "MiniMax-M2.1", + "context_window": 200000, + "max_output": 128000, + "description": "Polyglot code mastery, precision code refactoring (~60 tps)", + }, + { + "name": "MiniMax-M2.1-lightning", + "context_window": 200000, + "max_output": 128000, + "description": "Same performance as M2.1, significantly faster (~100 tps)", + }, + { + "name": "MiniMax-M2", + "context_window": 200000, + "max_output": 128000, + "description": "Agentic capabilities, advanced reasoning", + }, +] + + +class MiniMaxProvider(Provider): + """ + MiniMax provider using Anthropic-compatible API. + + MiniMax models support native interleaved thinking without requiring beta headers. + The API uses the standard messages endpoint (not beta). + + Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api + """ + + provider_type: Literal[ProviderType.minimax] = Field(ProviderType.minimax, description="The type of the provider.") + provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)") + api_key: str | None = Field(None, description="API key for the MiniMax API.", deprecated=True) + base_url: str = Field("https://api.minimax.io/anthropic", description="Base URL for the MiniMax Anthropic-compatible API.") + + async def check_api_key(self): + """Check if the API key is valid by making a test request to the MiniMax API.""" + api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None + if not api_key: + raise ValueError("No API key provided") + + try: + # Use async Anthropic client pointed at MiniMax's Anthropic-compatible endpoint + client = anthropic.AsyncAnthropic(api_key=api_key, base_url=self.base_url) + # Use count_tokens as a lightweight check - similar to Anthropic provider + await client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}]) + except anthropic.AuthenticationError as e: + raise LLMAuthenticationError(message=f"Failed to authenticate with MiniMax: {e}", code=ErrorCode.UNAUTHENTICATED) + except Exception as e: + raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR) + + def get_default_max_output_tokens(self, model_name: str) -> int: + """Get the default max output tokens for MiniMax models.""" + # All MiniMax models support 128K output tokens + return 128000 + + def get_model_context_window_size(self, model_name: str) -> int | None: + """Get the context window size for a MiniMax model.""" + # All current MiniMax models have 200K context window + for model in MODEL_LIST: + if model["name"] == model_name: + return model["context_window"] + # Default fallback + return 200000 + + async def list_llm_models_async(self) -> list[LLMConfig]: + """ + Return available MiniMax models. + + MiniMax doesn't have a models listing endpoint, so we use a hardcoded list. + """ + configs = [] + for model in MODEL_LIST: + configs.append( + LLMConfig( + model=model["name"], + model_endpoint_type="minimax", + model_endpoint=self.base_url, + context_window=model["context_window"], + handle=self.get_handle(model["name"]), + max_tokens=model["max_output"], + # MiniMax models support native thinking, similar to Claude's extended thinking + put_inner_thoughts_in_kwargs=True, + provider_name=self.name, + provider_category=self.provider_category, + ) + ) + return configs diff --git a/letta/schemas/providers/openai.py b/letta/schemas/providers/openai.py index 1bafb34f..a1bdbb26 100644 --- a/letta/schemas/providers/openai.py +++ b/letta/schemas/providers/openai.py @@ -42,22 +42,37 @@ class OpenAIProvider(Provider): raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR) def get_default_max_output_tokens(self, model_name: str) -> int: - """Get the default max output tokens for OpenAI models.""" - if model_name.startswith("gpt-5"): - return 16384 - elif model_name.startswith("o1") or model_name.startswith("o3"): - return 100000 - return 16384 # default for openai + """Get the default max output tokens for OpenAI models (sync fallback).""" + # Simple default for openai + return 16384 + + async def get_default_max_output_tokens_async(self, model_name: str) -> int: + """Get the default max output tokens for OpenAI models. + + Uses litellm model specifications with a simple fallback. + """ + from letta.model_specs.litellm_model_specs import get_max_output_tokens + + # Try litellm specs + max_output = await get_max_output_tokens(model_name) + if max_output is not None: + return max_output + + # Simple default for openai + return 16384 async def _get_models_async(self) -> list[dict]: from letta.llm_api.openai import openai_get_model_list_async - # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)... - # See: https://openrouter.ai/docs/requests - extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None - - # Similar to Nebius - extra_params = {"verbose": True} if "nebius.com" in self.base_url else None + # Provider-specific extra parameters for model listing + extra_params = None + if "openrouter.ai" in self.base_url: + # OpenRouter: filter for models with tool calling support + # See: https://openrouter.ai/docs/requests + extra_params = {"supported_parameters": "tools"} + elif "nebius.com" in self.base_url: + # Nebius: use verbose mode for better model info + extra_params = {"verbose": True} # Decrypt API key before using api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None @@ -76,7 +91,7 @@ class OpenAIProvider(Provider): async def list_llm_models_async(self) -> list[LLMConfig]: data = await self._get_models_async() - return self._list_llm_models(data) + return await self._list_llm_models(data) async def list_embedding_models_async(self) -> list[EmbeddingConfig]: """Return known OpenAI embedding models. @@ -116,13 +131,13 @@ class OpenAIProvider(Provider): ), ] - def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]: + async def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]: """ This handles filtering out LLM Models by provider that meet Letta's requirements. """ configs = [] for model in data: - check = self._do_model_checks_for_name_and_context_size(model) + check = await self._do_model_checks_for_name_and_context_size_async(model) if check is None: continue model_name, context_window_size = check @@ -174,7 +189,7 @@ class OpenAIProvider(Provider): model_endpoint=self.base_url, context_window=context_window_size, handle=handle, - max_tokens=self.get_default_max_output_tokens(model_name), + max_tokens=await self.get_default_max_output_tokens_async(model_name), provider_name=self.name, provider_category=self.provider_category, ) @@ -188,12 +203,30 @@ class OpenAIProvider(Provider): return configs def _do_model_checks_for_name_and_context_size(self, model: dict, length_key: str = "context_length") -> tuple[str, int] | None: + """Sync version - uses sync get_model_context_window_size (for subclasses with hardcoded values).""" if "id" not in model: logger.warning("Model missing 'id' field for provider: %s and model: %s", self.provider_type, model) return None model_name = model["id"] - context_window_size = model.get(length_key) or self.get_model_context_window_size(model_name) + context_window_size = self.get_model_context_window_size(model_name) + + if not context_window_size: + logger.info("No context window size found for model: %s", model_name) + return None + + return model_name, context_window_size + + async def _do_model_checks_for_name_and_context_size_async( + self, model: dict, length_key: str = "context_length" + ) -> tuple[str, int] | None: + """Async version - uses async get_model_context_window_size_async (for litellm lookup).""" + if "id" not in model: + logger.warning("Model missing 'id' field for provider: %s and model: %s", self.provider_type, model) + return None + + model_name = model["id"] + context_window_size = await self.get_model_context_window_size_async(model_name) if not context_window_size: logger.info("No context window size found for model: %s", model_name) @@ -211,19 +244,30 @@ class OpenAIProvider(Provider): return llm_config def get_model_context_window_size(self, model_name: str) -> int | None: - if model_name in LLM_MAX_CONTEXT_WINDOW: - return LLM_MAX_CONTEXT_WINDOW[model_name] - else: - logger.debug( - "Model %s on %s for provider %s not found in LLM_MAX_CONTEXT_WINDOW. Using default of {LLM_MAX_CONTEXT_WINDOW['DEFAULT']}", - model_name, - self.base_url, - self.__class__.__name__, - ) - return LLM_MAX_CONTEXT_WINDOW["DEFAULT"] + """Get the context window size for a model (sync fallback).""" + return LLM_MAX_CONTEXT_WINDOW["DEFAULT"] + + async def get_model_context_window_size_async(self, model_name: str) -> int | None: + """Get the context window size for a model. + + Uses litellm model specifications which covers all OpenAI models. + """ + from letta.model_specs.litellm_model_specs import get_context_window + + context_window = await get_context_window(model_name) + if context_window is not None: + return context_window + + # Simple fallback + logger.debug( + "Model %s not found in litellm specs. Using default of %s", + model_name, + LLM_MAX_CONTEXT_WINDOW["DEFAULT"], + ) + return LLM_MAX_CONTEXT_WINDOW["DEFAULT"] def get_model_context_window(self, model_name: str) -> int | None: return self.get_model_context_window_size(model_name) async def get_model_context_window_async(self, model_name: str) -> int | None: - return self.get_model_context_window_size(model_name) + return await self.get_model_context_window_size_async(model_name) diff --git a/letta/schemas/providers/openrouter.py b/letta/schemas/providers/openrouter.py index 4423b0d5..7f1ba419 100644 --- a/letta/schemas/providers/openrouter.py +++ b/letta/schemas/providers/openrouter.py @@ -1,52 +1,106 @@ from typing import Literal +from openai import AsyncOpenAI, AuthenticationError from pydantic import Field -from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW +from letta.errors import ErrorCode, LLMAuthenticationError, LLMError from letta.log import get_logger -from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import ProviderCategory, ProviderType from letta.schemas.llm_config import LLMConfig from letta.schemas.providers.openai import OpenAIProvider logger = get_logger(__name__) -# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"} -# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"} -# DEFAULT_EMBEDDING_BATCH_SIZE = 1024 +# Default context window for models not in the API response +DEFAULT_CONTEXT_WINDOW = 128000 class OpenRouterProvider(OpenAIProvider): - provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.") + """ + OpenRouter provider - https://openrouter.ai/ + + OpenRouter is an OpenAI-compatible API gateway that provides access to + multiple LLM providers (Anthropic, Meta, Mistral, etc.) through a unified API. + """ + + provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.") provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)") api_key: str | None = Field(None, description="API key for the OpenRouter API.", deprecated=True) base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.") - def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]: + async def check_api_key(self): + """Check if the API key is valid by making a test request to the OpenRouter API.""" + api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None + if not api_key: + raise ValueError("No API key provided") + + try: + # Use async OpenAI client pointed at OpenRouter's endpoint + client = AsyncOpenAI(api_key=api_key, base_url=self.base_url) + # Just list models to verify API key works + await client.models.list() + except AuthenticationError as e: + raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED) + except Exception as e: + raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR) + + def get_model_context_window_size(self, model_name: str) -> int | None: + """Get the context window size for an OpenRouter model. + + OpenRouter models provide context_length in the API response, + so this is mainly a fallback. """ - This handles filtering out LLM Models by provider that meet Letta's requirements. + return DEFAULT_CONTEXT_WINDOW + + async def list_llm_models_async(self) -> list[LLMConfig]: """ + Return available OpenRouter models that support tool calling. + + OpenRouter provides a models endpoint that supports filtering by supported_parameters. + We filter for models that support 'tools' to ensure Letta compatibility. + """ + from letta.llm_api.openai import openai_get_model_list_async + + api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None + + # OpenRouter supports filtering models by supported parameters + # See: https://openrouter.ai/docs/requests + extra_params = {"supported_parameters": "tools"} + + response = await openai_get_model_list_async( + self.base_url, + api_key=api_key, + extra_params=extra_params, + ) + + data = response.get("data", response) + configs = [] for model in data: - check = self._do_model_checks_for_name_and_context_size(model) - if check is None: + if "id" not in model: + logger.warning(f"OpenRouter model missing 'id' field: {model}") continue - model_name, context_window_size = check - handle = self.get_handle(model_name) + model_name = model["id"] - config = LLMConfig( - model=model_name, - model_endpoint_type="openai", - model_endpoint=self.base_url, - context_window=context_window_size, - handle=handle, - max_tokens=self.get_default_max_output_tokens(model_name), - provider_name=self.name, - provider_category=self.provider_category, + # OpenRouter returns context_length in the model listing + if "context_length" in model and model["context_length"]: + context_window_size = model["context_length"] + else: + context_window_size = self.get_model_context_window_size(model_name) + logger.debug(f"Model {model_name} missing context_length, using default: {context_window_size}") + + configs.append( + LLMConfig( + model=model_name, + model_endpoint_type="openrouter", + model_endpoint=self.base_url, + context_window=context_window_size, + handle=self.get_handle(model_name), + max_tokens=self.get_default_max_output_tokens(model_name), + provider_name=self.name, + provider_category=self.provider_category, + ) ) - config = self._set_model_parameter_tuned_defaults(model_name, config) - configs.append(config) - return configs diff --git a/letta/schemas/providers/sglang.py b/letta/schemas/providers/sglang.py new file mode 100644 index 00000000..657c2e38 --- /dev/null +++ b/letta/schemas/providers/sglang.py @@ -0,0 +1,80 @@ +""" +SGLang provider for Letta. + +SGLang is a high-performance inference engine that exposes OpenAI-compatible API endpoints. +""" + +from typing import Literal + +from pydantic import Field + +from letta.schemas.embedding_config import EmbeddingConfig +from letta.schemas.enums import ProviderCategory, ProviderType +from letta.schemas.llm_config import LLMConfig +from letta.schemas.providers.base import Provider + + +class SGLangProvider(Provider): + provider_type: Literal[ProviderType.sglang] = Field( + ProviderType.sglang, + description="The type of the provider." + ) + provider_category: ProviderCategory = Field( + ProviderCategory.base, + description="The category of the provider (base or byok)" + ) + base_url: str = Field( + ..., + description="Base URL for the SGLang API (e.g., http://localhost:30000)." + ) + api_key: str | None = Field( + None, + description="API key for the SGLang API (optional for local instances)." + ) + default_prompt_formatter: str | None = Field( + default=None, + description="Default prompt formatter (aka model wrapper)." + ) + handle_base: str | None = Field( + None, + description="Custom handle base name for model handles." + ) + + async def list_llm_models_async(self) -> list[LLMConfig]: + from letta.llm_api.openai import openai_get_model_list_async + + # Ensure base_url ends with /v1 (SGLang uses same convention as vLLM) + base_url = self.base_url.rstrip("/") + if not base_url.endswith("/v1"): + base_url = base_url + "/v1" + + # Decrypt API key before using (may be None for local instances) + api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None + + response = await openai_get_model_list_async(base_url, api_key=api_key) + data = response.get("data", response) + + configs = [] + + for model in data: + model_name = model["id"] + + configs.append( + LLMConfig( + model=model_name, + model_endpoint_type="openai", # SGLang is OpenAI-compatible + model_endpoint=base_url, + model_wrapper=self.default_prompt_formatter, + context_window=model.get("max_model_len", 8192), + handle=self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name), + max_tokens=self.get_default_max_output_tokens(model_name), + provider_name=self.name, + provider_category=self.provider_category, + ) + ) + + return configs + + async def list_embedding_models_async(self) -> list[EmbeddingConfig]: + # SGLang embedding support not common for training use cases + return [] diff --git a/letta/schemas/usage.py b/letta/schemas/usage.py index da015e77..d2f5191d 100644 --- a/letta/schemas/usage.py +++ b/letta/schemas/usage.py @@ -126,3 +126,53 @@ class LettaUsageStatistics(BaseModel): reasoning_tokens: Optional[int] = Field( None, description="The number of reasoning/thinking tokens generated. None if not reported by provider." ) + + def to_usage(self, provider_type: Optional["ProviderType"] = None) -> "UsageStatistics": + """Convert to UsageStatistics (OpenAI-compatible format). + + Args: + provider_type: ProviderType enum indicating which provider format to use. + Used to determine which cache field to populate. + + Returns: + UsageStatistics object with nested prompt/completion token details. + """ + from letta.schemas.enums import ProviderType + from letta.schemas.openai.chat_completion_response import ( + UsageStatistics, + UsageStatisticsCompletionTokenDetails, + UsageStatisticsPromptTokenDetails, + ) + + # Providers that use Anthropic-style cache fields (cache_read_tokens, cache_creation_tokens) + anthropic_style_providers = {ProviderType.anthropic, ProviderType.bedrock} + + # Build prompt_tokens_details if we have cache data + prompt_tokens_details = None + if self.cached_input_tokens is not None or self.cache_write_tokens is not None: + if provider_type in anthropic_style_providers: + # Anthropic uses cache_read_tokens and cache_creation_tokens + prompt_tokens_details = UsageStatisticsPromptTokenDetails( + cache_read_tokens=self.cached_input_tokens, + cache_creation_tokens=self.cache_write_tokens, + ) + else: + # OpenAI/Gemini use cached_tokens + prompt_tokens_details = UsageStatisticsPromptTokenDetails( + cached_tokens=self.cached_input_tokens, + ) + + # Build completion_tokens_details if we have reasoning tokens + completion_tokens_details = None + if self.reasoning_tokens is not None: + completion_tokens_details = UsageStatisticsCompletionTokenDetails( + reasoning_tokens=self.reasoning_tokens, + ) + + return UsageStatistics( + prompt_tokens=self.prompt_tokens, + completion_tokens=self.completion_tokens, + total_tokens=self.total_tokens, + prompt_tokens_details=prompt_tokens_details, + completion_tokens_details=completion_tokens_details, + ) diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py index d56fb4f2..19bdb656 100644 --- a/letta/server/rest_api/app.py +++ b/letta/server/rest_api/app.py @@ -38,6 +38,7 @@ from letta.errors import ( HandleNotFoundError, LettaAgentNotFoundError, LettaExpiredError, + LettaImageFetchError, LettaInvalidArgumentError, LettaInvalidMCPSchemaError, LettaMCPConnectionError, @@ -64,6 +65,7 @@ from letta.schemas.letta_message import create_letta_error_message_schema, creat from letta.schemas.letta_message_content import ( create_letta_assistant_message_content_union_schema, create_letta_message_content_union_schema, + create_letta_tool_return_content_union_schema, create_letta_user_message_content_union_schema, ) from letta.server.constants import REST_DEFAULT_PORT @@ -105,6 +107,7 @@ def generate_openapi_schema(app: FastAPI): letta_docs["components"]["schemas"]["LettaMessageUnion"] = create_letta_message_union_schema() letta_docs["components"]["schemas"]["LettaMessageContentUnion"] = create_letta_message_content_union_schema() letta_docs["components"]["schemas"]["LettaAssistantMessageContentUnion"] = create_letta_assistant_message_content_union_schema() + letta_docs["components"]["schemas"]["LettaToolReturnContentUnion"] = create_letta_tool_return_content_union_schema() letta_docs["components"]["schemas"]["LettaUserMessageContentUnion"] = create_letta_user_message_content_union_schema() letta_docs["components"]["schemas"]["LettaErrorMessage"] = create_letta_error_message_schema() @@ -163,10 +166,18 @@ async def lifespan(app_: FastAPI): except Exception as e: logger.warning(f"[Worker {worker_id}] Failed to download NLTK data: {e}") - # logger.info(f"[Worker {worker_id}] Starting lifespan initialization") - # logger.info(f"[Worker {worker_id}] Initializing database connections") - # db_registry.initialize_async() - # logger.info(f"[Worker {worker_id}] Database connections initialized") + # Log effective database timeout settings for debugging + try: + from sqlalchemy import text + + from letta.server.db import db_registry + + async with db_registry.async_session() as session: + result = await session.execute(text("SHOW statement_timeout")) + statement_timeout = result.scalar() + logger.warning(f"[Worker {worker_id}] PostgreSQL statement_timeout: {statement_timeout}") + except Exception as e: + logger.warning(f"[Worker {worker_id}] Failed to query statement_timeout: {e}") if should_use_pinecone(): if settings.upsert_pinecone_indices: @@ -180,7 +191,7 @@ async def lifespan(app_: FastAPI): logger.info(f"[Worker {worker_id}] Starting scheduler with leader election") global server - await server.init_async() + await server.init_async(init_with_default_org_and_user=not settings.no_default_actor) try: await start_scheduler_with_leader_election(server) logger.info(f"[Worker {worker_id}] Scheduler initialization completed") @@ -475,6 +486,7 @@ def create_application() -> "FastAPI": app.add_exception_handler(LettaToolNameConflictError, _error_handler_400) app.add_exception_handler(AgentFileImportError, _error_handler_400) app.add_exception_handler(EmbeddingConfigRequiredError, _error_handler_400) + app.add_exception_handler(LettaImageFetchError, _error_handler_400) app.add_exception_handler(ValueError, _error_handler_400) # 404 Not Found errors diff --git a/letta/server/rest_api/dependencies.py b/letta/server/rest_api/dependencies.py index 1cc1d02f..b6f6b6cc 100644 --- a/letta/server/rest_api/dependencies.py +++ b/letta/server/rest_api/dependencies.py @@ -3,7 +3,10 @@ from typing import TYPE_CHECKING, Optional from fastapi import Header from pydantic import BaseModel +from letta.errors import LettaInvalidArgumentError from letta.otel.tracing import tracer +from letta.schemas.enums import PrimitiveType +from letta.validators import PRIMITIVE_ID_PATTERNS if TYPE_CHECKING: from letta.server.server import SyncServer @@ -42,6 +45,12 @@ def get_headers( ) -> HeaderParams: """Dependency injection function to extract common headers from requests.""" with tracer.start_as_current_span("dependency.get_headers"): + if actor_id is not None and PRIMITIVE_ID_PATTERNS[PrimitiveType.USER.value].match(actor_id) is None: + raise LettaInvalidArgumentError( + message=(f"Invalid user ID format: {actor_id}. Expected format: '{PrimitiveType.USER.value}-'"), + argument_name="user_id", + ) + return HeaderParams( actor_id=actor_id, user_agent=user_agent, diff --git a/letta/server/rest_api/redis_stream_manager.py b/letta/server/rest_api/redis_stream_manager.py index 0b56c4c6..f5c96168 100644 --- a/letta/server/rest_api/redis_stream_manager.py +++ b/letta/server/rest_api/redis_stream_manager.py @@ -239,11 +239,11 @@ async def create_background_stream_processor( if isinstance(chunk, tuple): chunk = chunk[0] - # Track terminal events + # Track terminal events (check at line start to avoid false positives in message content) if isinstance(chunk, str): - if "data: [DONE]" in chunk: + if "\ndata: [DONE]" in chunk or chunk.startswith("data: [DONE]"): saw_done = True - if "event: error" in chunk: + if "\nevent: error" in chunk or chunk.startswith("event: error"): saw_error = True # Best-effort extraction of the error payload so we can persist it on the run. diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index 031feaa8..d480b963 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -308,6 +308,7 @@ async def _import_agent( strip_messages: bool = False, env_vars: Optional[dict[str, Any]] = None, override_embedding_handle: Optional[str] = None, + override_model_handle: Optional[str] = None, ) -> List[str]: """ Import an agent using the new AgentFileSchema format. @@ -319,6 +320,11 @@ async def _import_agent( else: embedding_config_override = None + if override_model_handle: + llm_config_override = await server.get_llm_config_from_handle_async(actor=actor, handle=override_model_handle) + else: + llm_config_override = None + import_result = await server.agent_serialization_manager.import_file( schema=agent_schema, actor=actor, @@ -327,6 +333,7 @@ async def _import_agent( override_existing_tools=override_existing_tools, env_vars=env_vars, override_embedding_config=embedding_config_override, + override_llm_config=llm_config_override, project_id=project_id, ) @@ -362,6 +369,10 @@ async def import_agent( None, description="Embedding handle to override with.", ), + model: Optional[str] = Form( + None, + description="Model handle to override the agent's default model. This allows the imported agent to use a different model while keeping other defaults (e.g., context size) from the original configuration.", + ), # Deprecated fields (maintain backward compatibility) append_copy_suffix: bool = Form( True, @@ -378,6 +389,11 @@ async def import_agent( description="Override import with specific embedding handle. Use 'embedding' instead.", deprecated=True, ), + override_model_handle: Optional[str] = Form( + None, + description="Model handle to override the agent's default model. Use 'model' instead.", + deprecated=True, + ), project_id: str | None = Form( None, description="The project ID to associate the uploaded agent with. This is now passed via headers.", deprecated=True ), @@ -408,6 +424,7 @@ async def import_agent( # Handle backward compatibility: prefer new field names over deprecated ones final_name = name or override_name final_embedding_handle = embedding or override_embedding_handle or x_override_embedding_model + final_model_handle = model or override_model_handle # Parse secrets (new) or env_vars_json (deprecated) env_vars = None @@ -440,6 +457,7 @@ async def import_agent( strip_messages=strip_messages, env_vars=env_vars, override_embedding_handle=final_embedding_handle, + override_model_handle=final_model_handle, ) else: # This is a legacy AgentSchema @@ -628,7 +646,9 @@ async def run_tool_for_agent( # Get agent with all relationships agent = await server.agent_manager.get_agent_by_id_async( - agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"] + agent_id, + actor, + include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"], ) # Find the tool by name among attached tools @@ -701,7 +721,7 @@ async def attach_source( await server.agent_manager.insert_files_into_context_window(agent_state=agent_state, file_metadata_with_content=files, actor=actor) if agent_state.enable_sleeptime: - source = await server.source_manager.get_source_by_id(source_id=source_id) + source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor) safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async") return agent_state @@ -728,7 +748,7 @@ async def attach_folder_to_agent( await server.agent_manager.insert_files_into_context_window(agent_state=agent_state, file_metadata_with_content=files, actor=actor) if agent_state.enable_sleeptime: - source = await server.source_manager.get_source_by_id(source_id=folder_id) + source = await server.source_manager.get_source_by_id(source_id=folder_id, actor=actor) safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async") if is_1_0_sdk_version(headers): @@ -759,7 +779,7 @@ async def detach_source( if agent_state.enable_sleeptime: try: - source = await server.source_manager.get_source_by_id(source_id=source_id) + source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor) block = await server.agent_manager.get_block_with_label_async(agent_id=agent_state.id, block_label=source.name, actor=actor) await server.block_manager.delete_block_async(block.id, actor) except: @@ -791,7 +811,7 @@ async def detach_folder_from_agent( if agent_state.enable_sleeptime: try: - source = await server.source_manager.get_source_by_id(source_id=folder_id) + source = await server.source_manager.get_source_by_id(source_id=folder_id, actor=actor) block = await server.agent_manager.get_block_with_label_async(agent_id=agent_state.id, block_label=source.name, actor=actor) await server.block_manager.delete_block_async(block.id, actor) except: @@ -1256,7 +1276,7 @@ async def detach_identity_from_agent( return None -@router.get("/{agent_id}/archival-memory", response_model=list[Passage], operation_id="list_passages", deprecated=True) +@router.get("/{agent_id}/archival-memory", response_model=list[Passage], operation_id="list_passages") async def list_passages( agent_id: AgentId, server: "SyncServer" = Depends(get_letta_server), @@ -1285,7 +1305,7 @@ async def list_passages( ) -@router.post("/{agent_id}/archival-memory", response_model=list[Passage], operation_id="create_passage", deprecated=True) +@router.post("/{agent_id}/archival-memory", response_model=list[Passage], operation_id="create_passage") async def create_passage( agent_id: AgentId, request: CreateArchivalMemory = Body(...), @@ -1306,7 +1326,6 @@ async def create_passage( "/{agent_id}/archival-memory/search", response_model=ArchivalMemorySearchResponse, operation_id="search_archival_memory", - deprecated=True, ) async def search_archival_memory( agent_id: AgentId, @@ -1354,7 +1373,7 @@ async def search_archival_memory( # TODO(ethan): query or path parameter for memory_id? # @router.delete("/{agent_id}/archival") -@router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="delete_passage", deprecated=True) +@router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="delete_passage") async def delete_passage( memory_id: str, agent_id: AgentId, @@ -1520,7 +1539,9 @@ async def send_message( MetricRegistry().user_message_counter.add(1, get_ctx_attributes()) # TODO: This is redundant, remove soon agent = await server.agent_manager.get_agent_by_id_async( - agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"] + agent_id, + actor, + include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"], ) # Handle model override if specified in the request @@ -1799,7 +1820,9 @@ async def _process_message_background( try: agent = await server.agent_manager.get_agent_by_id_async( - agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"] + agent_id, + actor, + include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"], ) # Handle model override if specified @@ -1853,15 +1876,24 @@ async def _process_message_background( runs_manager = RunManager() from letta.schemas.enums import RunStatus + from letta.schemas.letta_stop_reason import StopReasonType - if result.stop_reason.stop_reason == "cancelled": + # Handle cases where stop_reason might be None (defensive) + if result.stop_reason and result.stop_reason.stop_reason == "cancelled": run_status = RunStatus.cancelled - else: + stop_reason = result.stop_reason.stop_reason + elif result.stop_reason: run_status = RunStatus.completed + stop_reason = result.stop_reason.stop_reason + else: + # Fallback: no stop_reason set (shouldn't happen but defensive) + logger.error(f"Run {run_id} completed without stop_reason in result, defaulting to end_turn") + run_status = RunStatus.completed + stop_reason = StopReasonType.end_turn await runs_manager.update_run_by_id_async( run_id=run_id, - update=RunUpdate(status=run_status, stop_reason=result.stop_reason.stop_reason), + update=RunUpdate(status=run_status, stop_reason=stop_reason), actor=actor, ) @@ -1869,20 +1901,22 @@ async def _process_message_background( # Update run status to failed with specific error info runs_manager = RunManager() from letta.schemas.enums import RunStatus + from letta.schemas.letta_stop_reason import StopReasonType await runs_manager.update_run_by_id_async( run_id=run_id, - update=RunUpdate(status=RunStatus.failed, metadata={"error": str(e)}), + update=RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.error, metadata={"error": str(e)}), actor=actor, ) except Exception as e: # Update run status to failed runs_manager = RunManager() from letta.schemas.enums import RunStatus + from letta.schemas.letta_stop_reason import StopReasonType await runs_manager.update_run_by_id_async( run_id=run_id, - update=RunUpdate(status=RunStatus.failed, metadata={"error": str(e)}), + update=RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.error, metadata={"error": str(e)}), actor=actor, ) finally: @@ -1966,7 +2000,9 @@ async def send_message_async( if use_lettuce: agent_state = await server.agent_manager.get_agent_by_id_async( - agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"] + agent_id, + actor, + include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"], ) # Allow V1 agents only if the message async flag is enabled is_v1_message_async_enabled = ( @@ -2020,10 +2056,11 @@ async def send_message_async( async def update_failed_run(): runs_manager = RunManager() from letta.schemas.enums import RunStatus + from letta.schemas.letta_stop_reason import StopReasonType await runs_manager.update_run_by_id_async( run_id=run.id, - update=RunUpdate(status=RunStatus.failed, metadata={"error": error_str}), + update=RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.error, metadata={"error": error_str}), actor=actor, ) diff --git a/letta/server/rest_api/routers/v1/archives.py b/letta/server/rest_api/routers/v1/archives.py index c109f1c8..1313bf8d 100644 --- a/letta/server/rest_api/routers/v1/archives.py +++ b/letta/server/rest_api/routers/v1/archives.py @@ -48,6 +48,13 @@ class PassageCreateRequest(BaseModel): text: str = Field(..., description="The text content of the passage") metadata: Optional[Dict] = Field(default=None, description="Optional metadata for the passage") tags: Optional[List[str]] = Field(default=None, description="Optional tags for categorizing the passage") + created_at: Optional[str] = Field(default=None, description="Optional creation datetime for the passage (ISO 8601 format)") + + +class PassageBatchCreateRequest(BaseModel): + """Request model for creating multiple passages in an archive.""" + + passages: List[PassageCreateRequest] = Field(..., description="Passages to create in the archive") @router.post("/", response_model=PydanticArchive, operation_id="create_archive") @@ -65,16 +72,14 @@ async def create_archive( if embedding_config is None: embedding_handle = archive.embedding if embedding_handle is None: - if settings.default_embedding_handle is None: - raise LettaInvalidArgumentError( - "Must specify either embedding or embedding_config in request", argument_name="default_embedding_handle" - ) - else: - embedding_handle = settings.default_embedding_handle - embedding_config = await server.get_embedding_config_from_handle_async( - handle=embedding_handle, - actor=actor, - ) + embedding_handle = settings.default_embedding_handle + # Only resolve embedding config if we have an embedding handle + if embedding_handle is not None: + embedding_config = await server.get_embedding_config_from_handle_async( + handle=embedding_handle, + actor=actor, + ) + # Otherwise, embedding_config remains None (text search only) return await server.archive_manager.create_archive_async( name=archive.name, @@ -227,6 +232,27 @@ async def create_passage_in_archive( text=passage.text, metadata=passage.metadata, tags=passage.tags, + created_at=passage.created_at, + actor=actor, + ) + + +@router.post("/{archive_id}/passages/batch", response_model=List[Passage], operation_id="create_passages_in_archive") +async def create_passages_in_archive( + archive_id: ArchiveId, + payload: PassageBatchCreateRequest = Body(...), + server: "SyncServer" = Depends(get_letta_server), + headers: HeaderParams = Depends(get_headers), +): + """ + Create multiple passages in an archive. + + This adds passages to the archive and creates embeddings for vector storage. + """ + actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id) + return await server.archive_manager.create_passages_in_archive_async( + archive_id=archive_id, + passages=[passage.model_dump() for passage in payload.passages], actor=actor, ) diff --git a/letta/server/rest_api/routers/v1/conversations.py b/letta/server/rest_api/routers/v1/conversations.py index 9cbba747..88dff474 100644 --- a/letta/server/rest_api/routers/v1/conversations.py +++ b/letta/server/rest_api/routers/v1/conversations.py @@ -5,16 +5,20 @@ from fastapi import APIRouter, Body, Depends, HTTPException, Query, status from pydantic import BaseModel, Field from starlette.responses import StreamingResponse +from letta.agents.agent_loop import AgentLoop from letta.agents.letta_agent_v3 import LettaAgentV3 +from letta.constants import REDIS_RUN_ID_PREFIX from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client from letta.errors import LettaExpiredError, LettaInvalidArgumentError, NoActiveRunsToCancelError from letta.helpers.datetime_helpers import get_utc_time from letta.log import get_logger from letta.schemas.conversation import Conversation, CreateConversation, UpdateConversation from letta.schemas.enums import RunStatus +from letta.schemas.job import LettaRequestConfig from letta.schemas.letta_message import LettaMessageUnion -from letta.schemas.letta_request import LettaStreamingRequest, RetrieveStreamRequest +from letta.schemas.letta_request import ConversationMessageRequest, LettaStreamingRequest, RetrieveStreamRequest from letta.schemas.letta_response import LettaResponse, LettaStreamingResponse +from letta.schemas.run import Run as PydanticRun from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server from letta.server.rest_api.redis_stream_manager import redis_sse_stream_generator from letta.server.rest_api.streaming_response import ( @@ -60,6 +64,7 @@ async def list_conversations( agent_id: str = Query(..., description="The agent ID to list conversations for"), limit: int = Query(50, description="Maximum number of conversations to return"), after: Optional[str] = Query(None, description="Cursor for pagination (conversation ID)"), + summary_search: Optional[str] = Query(None, description="Search for text within conversation summaries"), server: SyncServer = Depends(get_letta_server), headers: HeaderParams = Depends(get_headers), ): @@ -70,6 +75,7 @@ async def list_conversations( actor=actor, limit=limit, after=after, + summary_search=summary_search, ) @@ -154,51 +160,112 @@ async def list_conversation_messages( @router.post( "/{conversation_id}/messages", - response_model=LettaStreamingResponse, + response_model=LettaResponse, operation_id="send_conversation_message", responses={ 200: { "description": "Successful response", "content": { - "text/event-stream": {"description": "Server-Sent Events stream"}, + "text/event-stream": {"description": "Server-Sent Events stream (default, when streaming=true)"}, + "application/json": {"description": "JSON response (when streaming=false)"}, }, } }, ) async def send_conversation_message( conversation_id: ConversationId, - request: LettaStreamingRequest = Body(...), + request: ConversationMessageRequest = Body(...), server: SyncServer = Depends(get_letta_server), headers: HeaderParams = Depends(get_headers), ) -> StreamingResponse | LettaResponse: """ - Send a message to a conversation and get a streaming response. + Send a message to a conversation and get a response. - This endpoint sends a message to an existing conversation and streams - the agent's response back. + This endpoint sends a message to an existing conversation. + By default (streaming=true), returns a streaming response (Server-Sent Events). + Set streaming=false to get a complete JSON response. """ actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id) - # Get the conversation to find the agent_id + if not request.messages or len(request.messages) == 0: + raise HTTPException(status_code=422, detail="Messages must not be empty") + conversation = await conversation_manager.get_conversation_by_id( conversation_id=conversation_id, actor=actor, ) - # Force streaming mode for this endpoint - request.streaming = True + # Streaming mode (default) + if request.streaming: + # Convert to LettaStreamingRequest for StreamingService compatibility + streaming_request = LettaStreamingRequest( + messages=request.messages, + streaming=True, + stream_tokens=request.stream_tokens, + include_pings=request.include_pings, + background=request.background, + max_steps=request.max_steps, + use_assistant_message=request.use_assistant_message, + assistant_message_tool_name=request.assistant_message_tool_name, + assistant_message_tool_kwarg=request.assistant_message_tool_kwarg, + include_return_message_types=request.include_return_message_types, + override_model=request.override_model, + client_tools=request.client_tools, + ) + streaming_service = StreamingService(server) + run, result = await streaming_service.create_agent_stream( + agent_id=conversation.agent_id, + actor=actor, + request=streaming_request, + run_type="send_conversation_message", + conversation_id=conversation_id, + ) + return result - # Use streaming service - streaming_service = StreamingService(server) - run, result = await streaming_service.create_agent_stream( - agent_id=conversation.agent_id, - actor=actor, - request=request, - run_type="send_conversation_message", - conversation_id=conversation_id, + # Non-streaming mode + agent = await server.agent_manager.get_agent_by_id_async( + conversation.agent_id, + actor, + include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"], ) - return result + if request.override_model: + override_llm_config = await server.get_llm_config_from_handle_async( + actor=actor, + handle=request.override_model, + ) + agent = agent.model_copy(update={"llm_config": override_llm_config}) + + # Create a run for execution tracking + run = None + if settings.track_agent_run: + runs_manager = RunManager() + run = await runs_manager.create_run( + pydantic_run=PydanticRun( + agent_id=conversation.agent_id, + background=False, + metadata={ + "run_type": "send_conversation_message", + }, + request_config=LettaRequestConfig.from_letta_request(request), + ), + actor=actor, + ) + + # Set run_id in Redis for cancellation support + redis_client = await get_redis_client() + await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{conversation.agent_id}", run.id if run else None) + + agent_loop = AgentLoop.load(agent_state=agent, actor=actor) + return await agent_loop.step( + request.messages, + max_steps=request.max_steps, + run_id=run.id if run else None, + use_assistant_message=request.use_assistant_message, + include_return_message_types=request.include_return_message_types, + client_tools=request.client_tools, + conversation_id=conversation_id, + ) @router.post( @@ -289,11 +356,14 @@ async def retrieve_conversation_stream( ) if settings.enable_cancellation_aware_streaming: + from letta.server.rest_api.streaming_response import cancellation_aware_stream_wrapper, get_cancellation_event_for_run + stream = cancellation_aware_stream_wrapper( stream_generator=stream, run_manager=server.run_manager, run_id=run.id, actor=actor, + cancellation_event=get_cancellation_event_for_run(run.id), ) if request and request.include_pings and settings.enable_keepalive: diff --git a/letta/server/rest_api/routers/v1/folders.py b/letta/server/rest_api/routers/v1/folders.py index d925454c..908004ac 100644 --- a/letta/server/rest_api/routers/v1/folders.py +++ b/letta/server/rest_api/routers/v1/folders.py @@ -594,7 +594,7 @@ async def load_file_to_source_async(server: SyncServer, source_id: str, job_id: async def sleeptime_document_ingest_async(server: SyncServer, source_id: str, actor: User, clear_history: bool = False): - source = await server.source_manager.get_source_by_id(source_id=source_id) + source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor) agents = await server.source_manager.list_attached_agents(source_id=source_id, actor=actor) for agent in agents: if agent.enable_sleeptime: diff --git a/letta/server/rest_api/routers/v1/messages.py b/letta/server/rest_api/routers/v1/messages.py index dc6b0f9e..e695d292 100644 --- a/letta/server/rest_api/routers/v1/messages.py +++ b/letta/server/rest_api/routers/v1/messages.py @@ -231,7 +231,7 @@ async def list_messages_for_batch( # Get messages directly using our efficient method messages = await server.batch_manager.get_messages_for_letta_batch_async( - letta_batch_job_id=batch_id, limit=limit, actor=actor, agent_id=agent_id, ascending=(order == "asc"), before=before, after=after + letta_batch_job_id=batch_id, actor=actor, limit=limit, agent_id=agent_id, sort_descending=(order == "desc"), cursor=after ) return LettaBatchMessages(messages=messages) diff --git a/letta/server/rest_api/routers/v1/passages.py b/letta/server/rest_api/routers/v1/passages.py index cdb1010c..7e7f59c3 100644 --- a/letta/server/rest_api/routers/v1/passages.py +++ b/letta/server/rest_api/routers/v1/passages.py @@ -4,18 +4,62 @@ from typing import List, Literal, Optional from fastapi import APIRouter, Body, Depends from pydantic import BaseModel, Field +from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import TagMatchMode from letta.schemas.passage import Passage +from letta.schemas.user import User as PydanticUser from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server from letta.server.server import SyncServer router = APIRouter(prefix="/passages", tags=["passages"]) +async def _get_embedding_config_for_search( + server: SyncServer, + actor: PydanticUser, + agent_id: Optional[str], + archive_id: Optional[str], +) -> Optional[EmbeddingConfig]: + """Determine which embedding config to use for a passage search. + + Args: + server: The SyncServer instance + actor: The user making the request + agent_id: Optional agent ID to get embedding config from + archive_id: Optional archive ID to get embedding config from + + Returns: + The embedding config to use, or None if not found + + Priority: + 1. If agent_id is provided, use that agent's embedding config + 2. If archive_id is provided, use that archive's embedding config + 3. Otherwise, try to get embedding config from any existing agent + 4. Fall back to server default if no agents exist + """ + if agent_id: + agent_state = await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor) + return agent_state.embedding_config + + if archive_id: + archive = await server.archive_manager.get_archive_by_id_async(archive_id=archive_id, actor=actor) + return archive.embedding_config + + # Search across all passages - try to get embedding config from any agent + agent_count = await server.agent_manager.size_async(actor=actor) + if agent_count > 0: + agents = await server.agent_manager.list_agents_async(actor=actor, limit=1) + if agents: + return agents[0].embedding_config + + # Fall back to server default + return server.default_embedding_config + + class PassageSearchRequest(BaseModel): """Request model for searching passages across archives.""" - query: str = Field(..., description="Text query for semantic search") + query: Optional[str] = Field(None, description="Text query for semantic search") agent_id: Optional[str] = Field(None, description="Filter passages by agent ID") archive_id: Optional[str] = Field(None, description="Filter passages by archive ID") tags: Optional[List[str]] = Field(None, description="Optional list of tags to filter search results") @@ -56,29 +100,16 @@ async def search_passages( # Convert tag_match_mode to enum tag_mode = TagMatchMode.ANY if request.tag_match_mode == "any" else TagMatchMode.ALL - # Determine which embedding config to use + # Determine embedding config (only needed when query text is provided) + embed_query = bool(request.query) embedding_config = None - if request.agent_id: - # Search by agent - agent_state = await server.agent_manager.get_agent_by_id_async(agent_id=request.agent_id, actor=actor) - embedding_config = agent_state.embedding_config - elif request.archive_id: - # Search by archive_id - archive = await server.archive_manager.get_archive_by_id_async(archive_id=request.archive_id, actor=actor) - embedding_config = archive.embedding_config - else: - # Search across all passages in the organization - # Get default embedding config from any agent or use server default - agent_count = await server.agent_manager.size_async(actor=actor) - if agent_count > 0: - # Get first agent to derive embedding config - agents = await server.agent_manager.list_agents_async(actor=actor, limit=1) - if agents: - embedding_config = agents[0].embedding_config - - if not embedding_config: - # Fall back to server default - embedding_config = server.default_embedding_config + if embed_query: + embedding_config = await _get_embedding_config_for_search( + server=server, + actor=actor, + agent_id=request.agent_id, + archive_id=request.archive_id, + ) # Search passages passages_with_metadata = await server.agent_manager.query_agent_passages_async( @@ -88,7 +119,7 @@ async def search_passages( query_text=request.query, limit=request.limit, embedding_config=embedding_config, - embed_query=True, + embed_query=embed_query, tags=request.tags, tag_match_mode=tag_mode, start_date=request.start_date, diff --git a/letta/server/rest_api/routers/v1/providers.py b/letta/server/rest_api/routers/v1/providers.py index 5d5135f9..5c0ae926 100644 --- a/letta/server/rest_api/routers/v1/providers.py +++ b/letta/server/rest_api/routers/v1/providers.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING, List, Literal, Optional -from fastapi import APIRouter, Body, Depends, Query, status +from fastapi import APIRouter, Body, Depends, HTTPException, Query, status from fastapi.responses import JSONResponse from letta.schemas.enums import ProviderCategory, ProviderType @@ -144,6 +144,27 @@ async def check_existing_provider( ) +@router.patch("/{provider_id}/refresh", response_model=Provider, operation_id="refresh_provider_models") +async def refresh_provider_models( + provider_id: ProviderId, + headers: HeaderParams = Depends(get_headers), + server: "SyncServer" = Depends(get_letta_server), +): + """ + Refresh models for a BYOK provider by querying the provider's API. + Adds new models and removes ones no longer available. + """ + actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id) + provider = await server.provider_manager.get_provider_async(provider_id=provider_id, actor=actor) + + # Only allow refresh for BYOK providers + if provider.provider_category != ProviderCategory.byok: + raise HTTPException(status_code=400, detail="Refresh is only supported for BYOK providers") + + await server.provider_manager._sync_default_models_for_provider(provider, actor) + return await server.provider_manager.get_provider_async(provider_id=provider_id, actor=actor) + + @router.delete("/{provider_id}", response_model=None, operation_id="delete_provider") async def delete_provider( provider_id: ProviderId, diff --git a/letta/server/rest_api/routers/v1/runs.py b/letta/server/rest_api/routers/v1/runs.py index 30316d46..b4c3973d 100644 --- a/letta/server/rest_api/routers/v1/runs.py +++ b/letta/server/rest_api/routers/v1/runs.py @@ -393,11 +393,14 @@ async def retrieve_stream_for_run( ) if settings.enable_cancellation_aware_streaming: + from letta.server.rest_api.streaming_response import cancellation_aware_stream_wrapper, get_cancellation_event_for_run + stream = cancellation_aware_stream_wrapper( stream_generator=stream, run_manager=server.run_manager, run_id=run_id, actor=actor, + cancellation_event=get_cancellation_event_for_run(run_id), ) if request.include_pings and settings.enable_keepalive: diff --git a/letta/server/rest_api/routers/v1/sources.py b/letta/server/rest_api/routers/v1/sources.py index aad28074..d5a38a9c 100644 --- a/letta/server/rest_api/routers/v1/sources.py +++ b/letta/server/rest_api/routers/v1/sources.py @@ -485,7 +485,7 @@ async def load_file_to_source_async(server: SyncServer, source_id: str, job_id: async def sleeptime_document_ingest_async(server: SyncServer, source_id: str, actor: User, clear_history: bool = False): - source = await server.source_manager.get_source_by_id(source_id=source_id) + source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor) agents = await server.source_manager.list_attached_agents(source_id=source_id, actor=actor) for agent in agents: if agent.enable_sleeptime: diff --git a/letta/server/rest_api/routers/v1/users.py b/letta/server/rest_api/routers/v1/users.py index f8e5acd0..35d8a821 100644 --- a/letta/server/rest_api/routers/v1/users.py +++ b/letta/server/rest_api/routers/v1/users.py @@ -4,6 +4,7 @@ from fastapi import APIRouter, Body, Depends, Query from letta.schemas.user import User, UserCreate, UserUpdate from letta.server.rest_api.dependencies import get_letta_server +from letta.validators import UserIdQueryRequired if TYPE_CHECKING: from letta.schemas.user import User @@ -52,7 +53,7 @@ async def update_user( @router.delete("/", tags=["admin"], response_model=User, operation_id="delete_user") async def delete_user( - user_id: str = Query(..., description="The user_id key to be deleted."), + user_id: UserIdQueryRequired, server: "SyncServer" = Depends(get_letta_server), ): # TODO make a soft deletion, instead of a hard deletion diff --git a/letta/server/rest_api/streaming_response.py b/letta/server/rest_api/streaming_response.py index 9869ff5c..02d727ff 100644 --- a/letta/server/rest_api/streaming_response.py +++ b/letta/server/rest_api/streaming_response.py @@ -7,6 +7,7 @@ import json import re from collections.abc import AsyncIterator from datetime import datetime, timezone +from typing import Dict, Optional from uuid import uuid4 import anyio @@ -26,6 +27,17 @@ from letta.utils import safe_create_task logger = get_logger(__name__) +# Global registry of cancellation events per run_id +# Note: Events are small and we don't bother cleaning them up +_cancellation_events: Dict[str, asyncio.Event] = {} + + +def get_cancellation_event_for_run(run_id: str) -> asyncio.Event: + """Get or create a cancellation event for a run.""" + if run_id not in _cancellation_events: + _cancellation_events[run_id] = asyncio.Event() + return _cancellation_events[run_id] + class RunCancelledException(Exception): """Exception raised when a run is explicitly cancelled (not due to client timeout)""" @@ -125,6 +137,7 @@ async def cancellation_aware_stream_wrapper( run_id: str, actor: User, cancellation_check_interval: float = 0.5, + cancellation_event: Optional[asyncio.Event] = None, ) -> AsyncIterator[str | bytes]: """ Wraps a stream generator to provide real-time run cancellation checking. @@ -156,11 +169,22 @@ async def cancellation_aware_stream_wrapper( run = await run_manager.get_run_by_id(run_id=run_id, actor=actor) if run.status == RunStatus.cancelled: logger.info(f"Stream cancelled for run {run_id}, interrupting stream") + + # Signal cancellation via shared event if available + if cancellation_event: + cancellation_event.set() + logger.info(f"Set cancellation event for run {run_id}") + # Send cancellation event to client - cancellation_event = {"message_type": "stop_reason", "stop_reason": "cancelled"} - yield f"data: {json.dumps(cancellation_event)}\n\n" - # Raise custom exception for explicit run cancellation - raise RunCancelledException(run_id, f"Run {run_id} was cancelled") + stop_event = {"message_type": "stop_reason", "stop_reason": "cancelled"} + yield f"data: {json.dumps(stop_event)}\n\n" + + # Inject exception INTO the generator so its except blocks can catch it + try: + await stream_generator.athrow(RunCancelledException(run_id, f"Run {run_id} was cancelled")) + except (StopAsyncIteration, RunCancelledException): + # Generator closed gracefully or raised the exception back + break except RunCancelledException: # Re-raise cancellation immediately, don't catch it raise @@ -173,9 +197,10 @@ async def cancellation_aware_stream_wrapper( yield chunk except RunCancelledException: - # Re-raise RunCancelledException to distinguish from client timeout + # Don't re-raise - we already injected the exception into the generator + # The generator has handled it and set its stream_was_cancelled flag logger.info(f"Stream for run {run_id} was explicitly cancelled and cleaned up") - raise + # Don't raise - let it exit gracefully except asyncio.CancelledError: # Re-raise CancelledError (likely client timeout) to ensure proper cleanup logger.info(f"Stream for run {run_id} was cancelled (likely client timeout) and cleaned up") diff --git a/letta/server/rest_api/utils.py b/letta/server/rest_api/utils.py index 25186f8e..66e15572 100644 --- a/letta/server/rest_api/utils.py +++ b/letta/server/rest_api/utils.py @@ -20,7 +20,7 @@ from letta.constants import ( ) from letta.errors import ContextWindowExceededError, RateLimitExceededError from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms -from letta.helpers.message_helper import convert_message_creates_to_messages +from letta.helpers.message_helper import convert_message_creates_to_messages, resolve_tool_return_images from letta.log import get_logger from letta.otel.context import get_ctx_attributes from letta.otel.metric_registry import MetricRegistry @@ -171,18 +171,26 @@ async def create_input_messages( return messages -def create_approval_response_message_from_input( +async def create_approval_response_message_from_input( agent_state: AgentState, input_message: ApprovalCreate, run_id: Optional[str] = None ) -> List[Message]: - def maybe_convert_tool_return_message(maybe_tool_return: LettaToolReturn): + async def maybe_convert_tool_return_message(maybe_tool_return: LettaToolReturn): if isinstance(maybe_tool_return, LettaToolReturn): - packaged_function_response = package_function_response( - maybe_tool_return.status == "success", maybe_tool_return.tool_return, agent_state.timezone - ) + tool_return_content = maybe_tool_return.tool_return + + # Handle tool_return content - can be string or list of content parts (text/image) + if isinstance(tool_return_content, str): + # String content - wrap with package_function_response as before + func_response = package_function_response(maybe_tool_return.status == "success", tool_return_content, agent_state.timezone) + else: + # List of content parts (text/image) - resolve URL images to base64 first + resolved_content = await resolve_tool_return_images(tool_return_content) + func_response = resolved_content + return ToolReturn( tool_call_id=maybe_tool_return.tool_call_id, status=maybe_tool_return.status, - func_response=packaged_function_response, + func_response=func_response, stdout=maybe_tool_return.stdout, stderr=maybe_tool_return.stderr, ) @@ -196,6 +204,11 @@ def create_approval_response_message_from_input( getattr(input_message, "approval_request_id", None), ) + # Process all tool returns concurrently (for async image resolution) + import asyncio + + converted_approvals = await asyncio.gather(*[maybe_convert_tool_return_message(approval) for approval in approvals_list]) + return [ Message( role=MessageRole.approval, @@ -204,7 +217,7 @@ def create_approval_response_message_from_input( approval_request_id=input_message.approval_request_id, approve=input_message.approve, denial_reason=input_message.reason, - approvals=[maybe_convert_tool_return_message(approval) for approval in approvals_list], + approvals=list(converted_approvals), run_id=run_id, group_id=input_message.group_id if input_message.group_id diff --git a/letta/server/server.py b/letta/server/server.py index a021bdfc..2197c38a 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -19,7 +19,6 @@ from letta.config import LettaConfig from letta.constants import LETTA_TOOL_EXECUTION_DIR from letta.data_sources.connectors import DataConnector, load_data from letta.errors import ( - EmbeddingConfigRequiredError, HandleNotFoundError, LettaInvalidArgumentError, LettaMCPConnectionError, @@ -68,10 +67,12 @@ from letta.schemas.providers import ( GroqProvider, LettaProvider, LMStudioOpenAIProvider, + MiniMaxProvider, OllamaProvider, OpenAIProvider, OpenRouterProvider, Provider, + SGLangProvider, TogetherProvider, VLLMProvider, XAIProvider, @@ -283,15 +284,33 @@ class SyncServer(object): # NOTE: to use the /chat/completions endpoint, you need to specify extra flags on vLLM startup # see: https://docs.vllm.ai/en/stable/features/tool_calling.html # e.g. "... --enable-auto-tool-choice --tool-call-parser hermes" + # Auto-append /v1 to the base URL + vllm_url = ( + model_settings.vllm_api_base if model_settings.vllm_api_base.endswith("/v1") else model_settings.vllm_api_base + "/v1" + ) self._enabled_providers.append( VLLMProvider( name="vllm", - base_url=model_settings.vllm_api_base, + base_url=vllm_url, default_prompt_formatter=model_settings.default_prompt_formatter, handle_base=model_settings.vllm_handle_base, ) ) + if model_settings.sglang_api_base: + # Auto-append /v1 to the base URL + sglang_url = ( + model_settings.sglang_api_base if model_settings.sglang_api_base.endswith("/v1") else model_settings.sglang_api_base + "/v1" + ) + self._enabled_providers.append( + SGLangProvider( + name="sglang", + base_url=sglang_url, + default_prompt_formatter=model_settings.default_prompt_formatter, + handle_base=model_settings.sglang_handle_base, + ) + ) + if model_settings.aws_access_key_id and model_settings.aws_secret_access_key and model_settings.aws_default_region: self._enabled_providers.append( BedrockProvider( @@ -324,6 +343,13 @@ class SyncServer(object): api_key_enc=Secret.from_plaintext(model_settings.xai_api_key), ) ) + if model_settings.minimax_api_key: + self._enabled_providers.append( + MiniMaxProvider( + name="minimax", + api_key_enc=Secret.from_plaintext(model_settings.minimax_api_key), + ) + ) if model_settings.zai_api_key: self._enabled_providers.append( ZAIProvider( @@ -443,6 +469,8 @@ class SyncServer(object): embedding_models=embedding_models, organization_id=None, # Global models ) + # Update last_synced timestamp + await self.provider_manager.update_provider_last_synced_async(persisted_provider.id) logger.info( f"Synced {len(llm_models)} LLM models and {len(embedding_models)} embedding models for provider {persisted_provider.name}" ) @@ -628,9 +656,10 @@ class SyncServer(object): actor=actor, ) - async def create_sleeptime_agent_async(self, main_agent: AgentState, actor: User) -> AgentState: + async def create_sleeptime_agent_async(self, main_agent: AgentState, actor: User) -> Optional[AgentState]: if main_agent.embedding_config is None: - raise EmbeddingConfigRequiredError(agent_id=main_agent.id, operation="create_sleeptime_agent") + logger.warning(f"Skipping sleeptime agent creation for agent {main_agent.id}: no embedding config provided") + return None request = CreateAgent( name=main_agent.name + "-sleeptime", agent_type=AgentType.sleeptime_agent, @@ -662,9 +691,10 @@ class SyncServer(object): ) return await self.agent_manager.get_agent_by_id_async(agent_id=main_agent.id, actor=actor) - async def create_voice_sleeptime_agent_async(self, main_agent: AgentState, actor: User) -> AgentState: + async def create_voice_sleeptime_agent_async(self, main_agent: AgentState, actor: User) -> Optional[AgentState]: if main_agent.embedding_config is None: - raise EmbeddingConfigRequiredError(agent_id=main_agent.id, operation="create_voice_sleeptime_agent") + logger.warning(f"Skipping voice sleeptime agent creation for agent {main_agent.id}: no embedding config provided") + return None # TODO: Inject system request = CreateAgent( name=main_agent.name + "-sleeptime", @@ -956,7 +986,7 @@ class SyncServer(object): from letta.data_sources.connectors import DirectoryConnector # TODO: move this into a thread - source = await self.source_manager.get_source_by_id(source_id=source_id) + source = await self.source_manager.get_source_by_id(source_id=source_id, actor=actor) connector = DirectoryConnector(input_files=[file_path]) num_passages, num_documents = await self.load_data(user_id=source.created_by_id, source_name=source.name, connector=connector) @@ -1041,9 +1071,10 @@ class SyncServer(object): async def create_document_sleeptime_agent_async( self, main_agent: AgentState, source: Source, actor: User, clear_history: bool = False - ) -> AgentState: + ) -> Optional[AgentState]: if main_agent.embedding_config is None: - raise EmbeddingConfigRequiredError(agent_id=main_agent.id, operation="create_document_sleeptime_agent") + logger.warning(f"Skipping document sleeptime agent creation for agent {main_agent.id}: no embedding config provided") + return None try: block = await self.agent_manager.get_block_with_label_async(agent_id=main_agent.id, block_label=source.name, actor=actor) except: @@ -1151,10 +1182,18 @@ class SyncServer(object): if provider_type and provider.provider_type != provider_type: continue + # For bedrock, use schema default for base_url since DB may have NULL + # TODO: can maybe do this for all models but want to isolate change so we don't break any other providers + if provider.provider_type == ProviderType.bedrock: + typed_provider = provider.cast_to_subtype() + model_endpoint = typed_provider.base_url + else: + model_endpoint = provider.base_url + llm_config = LLMConfig( model=model.name, model_endpoint_type=model.model_endpoint_type, - model_endpoint=provider.base_url or model.model_endpoint_type, + model_endpoint=model_endpoint, context_window=model.max_context_window or 16384, handle=model.handle, provider_name=provider.name, @@ -1162,7 +1201,7 @@ class SyncServer(object): ) llm_models.append(llm_config) - # Get BYOK provider models by hitting provider endpoints directly + # Get BYOK provider models - sync if not synced yet, then read from DB if include_byok: byok_providers = await self.provider_manager.list_providers_async( actor=actor, @@ -1173,9 +1212,39 @@ class SyncServer(object): for provider in byok_providers: try: + # Get typed provider to access schema defaults (e.g., base_url) typed_provider = provider.cast_to_subtype() - models = await typed_provider.list_llm_models_async() - llm_models.extend(models) + + # Sync models if not synced yet + if provider.last_synced is None: + models = await typed_provider.list_llm_models_async() + embedding_models = await typed_provider.list_embedding_models_async() + await self.provider_manager.sync_provider_models_async( + provider=provider, + llm_models=models, + embedding_models=embedding_models, + organization_id=provider.organization_id, + ) + await self.provider_manager.update_provider_last_synced_async(provider.id, actor=actor) + + # Read from database + provider_llm_models = await self.provider_manager.list_models_async( + actor=actor, + model_type="llm", + provider_id=provider.id, + enabled=True, + ) + for model in provider_llm_models: + llm_config = LLMConfig( + model=model.name, + model_endpoint_type=model.model_endpoint_type, + model_endpoint=typed_provider.base_url, + context_window=model.max_context_window or constants.DEFAULT_CONTEXT_WINDOW, + handle=model.handle, + provider_name=provider.name, + provider_category=ProviderCategory.byok, + ) + llm_models.append(llm_config) except Exception as e: logger.warning(f"Failed to fetch models from BYOK provider {provider.name}: {e}") @@ -1217,7 +1286,7 @@ class SyncServer(object): ) embedding_models.append(embedding_config) - # Get BYOK provider models by hitting provider endpoints directly + # Get BYOK provider models - sync if not synced yet, then read from DB byok_providers = await self.provider_manager.list_providers_async( actor=actor, provider_category=[ProviderCategory.byok], @@ -1225,9 +1294,38 @@ class SyncServer(object): for provider in byok_providers: try: + # Get typed provider to access schema defaults (e.g., base_url) typed_provider = provider.cast_to_subtype() - models = await typed_provider.list_embedding_models_async() - embedding_models.extend(models) + + # Sync models if not synced yet + if provider.last_synced is None: + llm_models = await typed_provider.list_llm_models_async() + emb_models = await typed_provider.list_embedding_models_async() + await self.provider_manager.sync_provider_models_async( + provider=provider, + llm_models=llm_models, + embedding_models=emb_models, + organization_id=provider.organization_id, + ) + await self.provider_manager.update_provider_last_synced_async(provider.id, actor=actor) + + # Read from database + provider_embedding_models = await self.provider_manager.list_models_async( + actor=actor, + model_type="embedding", + provider_id=provider.id, + enabled=True, + ) + for model in provider_embedding_models: + embedding_config = EmbeddingConfig( + embedding_model=model.name, + embedding_endpoint_type=model.model_endpoint_type, + embedding_endpoint=typed_provider.base_url, + embedding_dim=model.embedding_dim or 1536, + embedding_chunk_size=constants.DEFAULT_EMBEDDING_CHUNK_SIZE, + handle=model.handle, + ) + embedding_models.append(embedding_config) except Exception as e: logger.warning(f"Failed to fetch embedding models from BYOK provider {provider.name}: {e}") diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py index 20341478..56b6a62f 100644 --- a/letta/services/agent_manager.py +++ b/letta/services/agent_manager.py @@ -357,7 +357,11 @@ class AgentManager: ) agent_create.llm_config = LLMConfig.apply_reasoning_setting_to_config( agent_create.llm_config, - agent_create.reasoning if agent_create.reasoning is not None else default_reasoning, + agent_create.reasoning + if agent_create.reasoning is not None + else ( + agent_create.llm_config.enable_reasoner if agent_create.llm_config.enable_reasoner is not None else default_reasoning + ), agent_create.agent_type, ) else: @@ -2042,10 +2046,12 @@ class AgentManager: if other_agent_id != agent_id: try: other_agent = await AgentModel.read_async(db_session=session, identifier=other_agent_id, actor=actor) - if other_agent.agent_type == AgentType.sleeptime_agent and block not in other_agent.core_memory: - other_agent.core_memory.append(block) - # await other_agent.update_async(session, actor=actor, no_commit=True) - await other_agent.update_async(session, actor=actor) + if other_agent.agent_type == AgentType.sleeptime_agent: + # Check if block with same label already exists + existing_block = next((b for b in other_agent.core_memory if b.label == block.label), None) + if not existing_block: + other_agent.core_memory.append(block) + await other_agent.update_async(session, actor=actor) except NoResultFound: # Agent might not exist anymore, skip continue @@ -2321,15 +2327,6 @@ class AgentManager: # Use Turbopuffer for vector search if archive is configured for TPUF if archive.vector_db_provider == VectorDBProvider.TPUF: from letta.helpers.tpuf_client import TurbopufferClient - from letta.llm_api.llm_client import LLMClient - - # Generate embedding for query - embedding_client = LLMClient.create( - provider_type=embedding_config.embedding_endpoint_type, - actor=actor, - ) - embeddings = await embedding_client.request_embeddings([query_text], embedding_config) - query_embedding = embeddings[0] # Query Turbopuffer - use hybrid search when text is available tpuf_client = TurbopufferClient() @@ -2488,13 +2485,15 @@ class AgentManager: # Get results using existing passage query method limit = top_k if top_k is not None else RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE + # Only use embedding-based search if embedding config is available + use_embedding_search = agent_state.embedding_config is not None passages_with_metadata = await self.query_agent_passages_async( actor=actor, agent_id=agent_id, query_text=query, limit=limit, embedding_config=agent_state.embedding_config, - embed_query=True, + embed_query=use_embedding_search, tags=tags, tag_match_mode=tag_mode, start_date=start_date, @@ -3053,10 +3052,19 @@ class AgentManager: ) # Apply cursor-based pagination - if before: - query = query.where(BlockModel.id < before) - if after: - query = query.where(BlockModel.id > after) + # Note: cursor direction must account for sort order + # - ascending order: "after X" means id > X, "before X" means id < X + # - descending order: "after X" means id < X, "before X" means id > X + if ascending: + if before: + query = query.where(BlockModel.id < before) + if after: + query = query.where(BlockModel.id > after) + else: + if before: + query = query.where(BlockModel.id > before) + if after: + query = query.where(BlockModel.id < after) # Apply sorting - use id instead of created_at for core memory blocks if ascending: diff --git a/letta/services/agent_serialization_manager.py b/letta/services/agent_serialization_manager.py index eb58d022..1947a8ee 100644 --- a/letta/services/agent_serialization_manager.py +++ b/letta/services/agent_serialization_manager.py @@ -33,6 +33,7 @@ from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import FileProcessingStatus, VectorDBProvider from letta.schemas.file import FileMetadata from letta.schemas.group import Group, GroupCreate +from letta.schemas.llm_config import LLMConfig from letta.schemas.mcp import MCPServer from letta.schemas.message import Message from letta.schemas.source import Source @@ -472,6 +473,7 @@ class AgentSerializationManager: dry_run: bool = False, env_vars: Optional[Dict[str, Any]] = None, override_embedding_config: Optional[EmbeddingConfig] = None, + override_llm_config: Optional[LLMConfig] = None, project_id: Optional[str] = None, ) -> ImportResult: """ @@ -672,6 +674,11 @@ class AgentSerializationManager: agent_schema.embedding_config = override_embedding_config agent_schema.embedding = override_embedding_config.handle + # Override llm_config if provided (keeps other defaults like context size) + if override_llm_config: + agent_schema.llm_config = override_llm_config + agent_schema.model = override_llm_config.handle + # Convert AgentSchema back to CreateAgent, remapping tool/block IDs agent_data = agent_schema.model_dump(exclude={"id", "in_context_message_ids", "messages"}) diff --git a/letta/services/archive_manager.py b/letta/services/archive_manager.py index b982ca4d..28c3322a 100644 --- a/letta/services/archive_manager.py +++ b/letta/services/archive_manager.py @@ -4,7 +4,6 @@ from typing import Dict, List, Optional from sqlalchemy import delete, or_, select -from letta.errors import EmbeddingConfigRequiredError from letta.helpers.tpuf_client import should_use_tpuf from letta.log import get_logger from letta.orm import ArchivalPassage, Archive as ArchiveModel, ArchivesAgents @@ -32,7 +31,7 @@ class ArchiveManager: async def create_archive_async( self, name: str, - embedding_config: EmbeddingConfig, + embedding_config: Optional[EmbeddingConfig] = None, description: Optional[str] = None, actor: PydanticUser = None, ) -> PydanticArchive: @@ -289,6 +288,7 @@ class ArchiveManager: text: str, metadata: Optional[Dict] = None, tags: Optional[List[str]] = None, + created_at: Optional[str] = None, actor: PydanticUser = None, ) -> PydanticPassage: """Create a passage in an archive. @@ -298,6 +298,7 @@ class ArchiveManager: text: The text content of the passage metadata: Optional metadata for the passage tags: Optional tags for categorizing the passage + created_at: Optional creation datetime in ISO 8601 format actor: User performing the operation Returns: @@ -312,13 +313,20 @@ class ArchiveManager: # Verify the archive exists and user has access archive = await self.get_archive_by_id_async(archive_id=archive_id, actor=actor) - # Generate embeddings for the text - embedding_client = LLMClient.create( - provider_type=archive.embedding_config.embedding_endpoint_type, - actor=actor, - ) - embeddings = await embedding_client.request_embeddings([text], archive.embedding_config) - embedding = embeddings[0] if embeddings else None + # Generate embeddings for the text if embedding config is available + embedding = None + if archive.embedding_config is not None: + embedding_client = LLMClient.create( + provider_type=archive.embedding_config.embedding_endpoint_type, + actor=actor, + ) + embeddings = await embedding_client.request_embeddings([text], archive.embedding_config) + embedding = embeddings[0] if embeddings else None + + # Parse created_at from ISO string if provided + parsed_created_at = None + if created_at: + parsed_created_at = datetime.fromisoformat(created_at) # Create the passage object with embedding passage = PydanticPassage( @@ -329,6 +337,7 @@ class ArchiveManager: tags=tags, embedding_config=archive.embedding_config, embedding=embedding, + created_at=parsed_created_at, ) # Use PassageManager to create the passage @@ -345,13 +354,14 @@ class ArchiveManager: tpuf_client = TurbopufferClient() - # Insert to Turbopuffer with the same ID as SQL + # Insert to Turbopuffer with the same ID as SQL, reusing existing embedding await tpuf_client.insert_archival_memories( archive_id=archive.id, text_chunks=[created_passage.text], passage_ids=[created_passage.id], organization_id=actor.organization_id, actor=actor, + embeddings=[created_passage.embedding], ) logger.info(f"Uploaded passage {created_passage.id} to Turbopuffer for archive {archive_id}") except Exception as e: @@ -362,6 +372,92 @@ class ArchiveManager: logger.info(f"Created passage {created_passage.id} in archive {archive_id}") return created_passage + @enforce_types + @raise_on_invalid_id(param_name="archive_id", expected_prefix=PrimitiveType.ARCHIVE) + @trace_method + async def create_passages_in_archive_async( + self, + archive_id: str, + passages: List[Dict], + actor: PydanticUser = None, + ) -> List[PydanticPassage]: + """Create multiple passages in an archive. + + Args: + archive_id: ID of the archive to add the passages to + passages: Passage create payloads + actor: User performing the operation + + Returns: + The created passages + + Raises: + NoResultFound: If archive not found + """ + if not passages: + return [] + + from letta.llm_api.llm_client import LLMClient + from letta.services.passage_manager import PassageManager + + archive = await self.get_archive_by_id_async(archive_id=archive_id, actor=actor) + + texts = [passage["text"] for passage in passages] + embedding_client = LLMClient.create( + provider_type=archive.embedding_config.embedding_endpoint_type, + actor=actor, + ) + embeddings = await embedding_client.request_embeddings(texts, archive.embedding_config) + + if len(embeddings) != len(passages): + raise ValueError("Embedding response count does not match passages count") + + # Build PydanticPassage objects for batch creation + pydantic_passages: List[PydanticPassage] = [] + for passage_payload, embedding in zip(passages, embeddings): + # Parse created_at from ISO string if provided + created_at = passage_payload.get("created_at") + if created_at and isinstance(created_at, str): + created_at = datetime.fromisoformat(created_at) + + passage = PydanticPassage( + text=passage_payload["text"], + archive_id=archive_id, + organization_id=actor.organization_id, + metadata=passage_payload.get("metadata") or {}, + tags=passage_payload.get("tags"), + embedding_config=archive.embedding_config, + embedding=embedding, + created_at=created_at, + ) + pydantic_passages.append(passage) + + # Use batch create for efficient single-transaction insert + passage_manager = PassageManager() + created_passages = await passage_manager.create_agent_passages_async( + pydantic_passages=pydantic_passages, + actor=actor, + ) + + if archive.vector_db_provider == VectorDBProvider.TPUF: + try: + from letta.helpers.tpuf_client import TurbopufferClient + + tpuf_client = TurbopufferClient() + await tpuf_client.insert_archival_memories( + archive_id=archive.id, + text_chunks=[passage.text for passage in created_passages], + passage_ids=[passage.id for passage in created_passages], + organization_id=actor.organization_id, + actor=actor, + ) + logger.info(f"Uploaded {len(created_passages)} passages to Turbopuffer for archive {archive_id}") + except Exception as e: + logger.error(f"Failed to upload passages to Turbopuffer: {e}") + + logger.info(f"Created {len(created_passages)} passages in archive {archive_id}") + return created_passages + @enforce_types @raise_on_invalid_id(param_name="archive_id", expected_prefix=PrimitiveType.ARCHIVE) @raise_on_invalid_id(param_name="passage_id", expected_prefix=PrimitiveType.PASSAGE) @@ -433,9 +529,7 @@ class ArchiveManager: ) return archive - # Create a default archive for this agent - if agent_state.embedding_config is None: - raise EmbeddingConfigRequiredError(agent_id=agent_state.id, operation="create_default_archive") + # Create a default archive for this agent (embedding_config is optional) archive_name = f"{agent_state.name}'s Archive" archive = await self.create_archive_async( name=archive_name, diff --git a/letta/services/block_manager.py b/letta/services/block_manager.py index cdd219b0..848c4868 100644 --- a/letta/services/block_manager.py +++ b/letta/services/block_manager.py @@ -508,7 +508,7 @@ class BlockManager: @enforce_types @raise_on_invalid_id(param_name="block_id", expected_prefix=PrimitiveType.BLOCK) @trace_method - async def get_block_by_id_async(self, block_id: str, actor: Optional[PydanticUser] = None) -> Optional[PydanticBlock]: + async def get_block_by_id_async(self, block_id: str, actor: PydanticUser) -> Optional[PydanticBlock]: """Retrieve a block by its ID, including tags.""" async with db_registry.async_session() as session: try: @@ -523,7 +523,7 @@ class BlockManager: @enforce_types @trace_method - async def get_all_blocks_by_ids_async(self, block_ids: List[str], actor: Optional[PydanticUser] = None) -> List[PydanticBlock]: + async def get_all_blocks_by_ids_async(self, block_ids: List[str], actor: PydanticUser) -> List[PydanticBlock]: """Retrieve blocks by their ids without loading unnecessary relationships. Async implementation.""" if not block_ids: return [] @@ -540,9 +540,8 @@ class BlockManager: noload(BlockModel.agents), noload(BlockModel.identities), noload(BlockModel.groups), noload(BlockModel.tags) ) - # Apply access control if actor is provided - if actor: - query = BlockModel.apply_access_predicate(query, actor, ["read"], AccessType.ORGANIZATION) + # Apply access control - actor is required for org-scoping + query = BlockModel.apply_access_predicate(query, actor, ["read"], AccessType.ORGANIZATION) # TODO: Add soft delete filter if applicable # if hasattr(BlockModel, "is_deleted"): diff --git a/letta/services/conversation_manager.py b/letta/services/conversation_manager.py index e5c87a1a..101598fb 100644 --- a/letta/services/conversation_manager.py +++ b/letta/services/conversation_manager.py @@ -105,9 +105,53 @@ class ConversationManager: actor: PydanticUser, limit: int = 50, after: Optional[str] = None, + summary_search: Optional[str] = None, ) -> List[PydanticConversation]: - """List conversations for an agent with cursor-based pagination.""" + """List conversations for an agent with cursor-based pagination. + + Args: + agent_id: The agent ID to list conversations for + actor: The user performing the action + limit: Maximum number of conversations to return + after: Cursor for pagination (conversation ID) + summary_search: Optional text to search for within the summary field + + Returns: + List of conversations matching the criteria + """ async with db_registry.async_session() as session: + # If summary search is provided, use custom query + if summary_search: + from sqlalchemy import and_ + + stmt = ( + select(ConversationModel) + .where( + and_( + ConversationModel.agent_id == agent_id, + ConversationModel.organization_id == actor.organization_id, + ConversationModel.summary.isnot(None), + ConversationModel.summary.contains(summary_search), + ) + ) + .order_by(ConversationModel.created_at.desc()) + .limit(limit) + ) + + if after: + # Add cursor filtering + after_conv = await ConversationModel.read_async( + db_session=session, + identifier=after, + actor=actor, + ) + stmt = stmt.where(ConversationModel.created_at < after_conv.created_at) + + result = await session.execute(stmt) + conversations = result.scalars().all() + return [conv.to_pydantic() for conv in conversations] + + # Use default list logic conversations = await ConversationModel.list_async( db_session=session, actor=actor, diff --git a/letta/services/file_manager.py b/letta/services/file_manager.py index bd52e309..ee3db939 100644 --- a/letta/services/file_manager.py +++ b/letta/services/file_manager.py @@ -91,18 +91,17 @@ class FileManager: await session.rollback() return await self.get_file_by_id(file_metadata.id, actor=actor) - # TODO: We make actor optional for now, but should most likely be enforced due to security reasons @enforce_types @raise_on_invalid_id(param_name="file_id", expected_prefix=PrimitiveType.FILE) @trace_method # @async_redis_cache( - # key_func=lambda self, file_id, actor=None, include_content=False, strip_directory_prefix=False: f"{file_id}:{actor.organization_id if actor else 'none'}:{include_content}:{strip_directory_prefix}", + # key_func=lambda self, file_id, actor, include_content=False, strip_directory_prefix=False: f"{file_id}:{actor.organization_id}:{include_content}:{strip_directory_prefix}", # prefix="file_content", # ttl_s=3600, # model_class=PydanticFileMetadata, # ) async def get_file_by_id( - self, file_id: str, actor: Optional[PydanticUser] = None, *, include_content: bool = False, strip_directory_prefix: bool = False + self, file_id: str, actor: PydanticUser, *, include_content: bool = False, strip_directory_prefix: bool = False ) -> Optional[PydanticFileMetadata]: """Retrieve a file by its ID. @@ -479,7 +478,7 @@ class FileManager: async def delete_file(self, file_id: str, actor: PydanticUser) -> PydanticFileMetadata: """Delete a file by its ID.""" async with db_registry.async_session() as session: - file = await FileMetadataModel.read_async(db_session=session, identifier=file_id) + file = await FileMetadataModel.read_async(db_session=session, identifier=file_id, actor=actor) # invalidate cache for this file before deletion await self._invalidate_file_caches(file_id, actor, file.original_file_name, file.source_id) diff --git a/letta/services/helpers/agent_manager_helper.py b/letta/services/helpers/agent_manager_helper.py index 542b6501..1bcf6683 100644 --- a/letta/services/helpers/agent_manager_helper.py +++ b/letta/services/helpers/agent_manager_helper.py @@ -1194,9 +1194,9 @@ async def build_agent_passage_query( """ # Handle embedding for vector search + # If embed_query is True but no embedding_config, fall through to text search embedded_text = None - if embed_query: - assert embedding_config is not None, "embedding_config must be specified for vector search" + if embed_query and embedding_config is not None: assert query_text is not None, "query_text must be specified for vector search" # Use the new LLMClient for embeddings diff --git a/letta/services/llm_batch_manager.py b/letta/services/llm_batch_manager.py index 6c09c2be..d544adf3 100644 --- a/letta/services/llm_batch_manager.py +++ b/letta/services/llm_batch_manager.py @@ -63,7 +63,7 @@ class LLMBatchManager: self, llm_batch_id: str, status: JobStatus, - actor: Optional[PydanticUser] = None, + actor: PydanticUser, latest_polling_response: Optional[BetaMessageBatch] = None, ) -> PydanticLLMBatchJob: """Update a batch job’s status and optionally its polling response.""" @@ -107,8 +107,8 @@ class LLMBatchManager: async def list_llm_batch_jobs_async( self, letta_batch_id: str, + actor: PydanticUser, limit: Optional[int] = None, - actor: Optional[PydanticUser] = None, after: Optional[str] = None, ) -> List[PydanticLLMBatchJob]: """ @@ -153,8 +153,8 @@ class LLMBatchManager: async def get_messages_for_letta_batch_async( self, letta_batch_job_id: str, + actor: PydanticUser, limit: int = 100, - actor: Optional[PydanticUser] = None, agent_id: Optional[str] = None, sort_descending: bool = True, cursor: Optional[str] = None, # Message ID as cursor diff --git a/letta/services/mcp_manager.py b/letta/services/mcp_manager.py index 173c909e..0f2ad4a8 100644 --- a/letta/services/mcp_manager.py +++ b/letta/services/mcp_manager.py @@ -419,6 +419,9 @@ class MCPManager: """ # Create base MCPServer object if isinstance(server_config, StdioServerConfig): + # Check if stdio MCP servers are disabled (not suitable for multi-tenant deployments) + if tool_settings.mcp_disable_stdio: + raise ValueError("MCP stdio servers are disabled. Set MCP_DISABLE_STDIO=false to enable them.") mcp_server = MCPServer(server_name=server_config.server_name, server_type=server_config.type, stdio_config=server_config) elif isinstance(server_config, SSEServerConfig): mcp_server = MCPServer( @@ -832,6 +835,9 @@ class MCPManager: server_config = SSEServerConfig(**server_config.model_dump()) return AsyncFastMCPSSEClient(server_config=server_config, oauth=oauth, agent_id=agent_id) elif server_config.type == MCPServerType.STDIO: + # Check if stdio MCP servers are disabled (not suitable for multi-tenant deployments) + if tool_settings.mcp_disable_stdio: + raise ValueError("MCP stdio servers are disabled. Set MCP_DISABLE_STDIO=false to enable them.") server_config = StdioServerConfig(**server_config.model_dump()) return AsyncStdioMCPClient(server_config=server_config, oauth_provider=None, agent_id=agent_id) elif server_config.type == MCPServerType.STREAMABLE_HTTP: diff --git a/letta/services/mcp_server_manager.py b/letta/services/mcp_server_manager.py index 8bd8534d..f1981a03 100644 --- a/letta/services/mcp_server_manager.py +++ b/letta/services/mcp_server_manager.py @@ -516,6 +516,9 @@ class MCPServerManager: """ # Create base MCPServer object if isinstance(server_config, StdioServerConfig): + # Check if stdio MCP servers are disabled (not suitable for multi-tenant deployments) + if tool_settings.mcp_disable_stdio: + raise ValueError("MCP stdio servers are disabled. Set MCP_DISABLE_STDIO=false to enable them.") mcp_server = MCPServer(server_name=server_config.server_name, server_type=server_config.type, stdio_config=server_config) elif isinstance(server_config, SSEServerConfig): mcp_server = MCPServer( @@ -1003,6 +1006,9 @@ class MCPServerManager: server_config = SSEServerConfig(**server_config.model_dump()) return AsyncFastMCPSSEClient(server_config=server_config, oauth=oauth, agent_id=agent_id) elif server_config.type == MCPServerType.STDIO: + # Check if stdio MCP servers are disabled (not suitable for multi-tenant deployments) + if tool_settings.mcp_disable_stdio: + raise ValueError("MCP stdio servers are disabled. Set MCP_DISABLE_STDIO=false to enable them.") server_config = StdioServerConfig(**server_config.model_dump()) return AsyncStdioMCPClient(server_config=server_config, oauth_provider=None, agent_id=agent_id) elif server_config.type == MCPServerType.STREAMABLE_HTTP: diff --git a/letta/services/passage_manager.py b/letta/services/passage_manager.py index 99781b55..0a69e70e 100644 --- a/letta/services/passage_manager.py +++ b/letta/services/passage_manager.py @@ -8,7 +8,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import noload from letta.constants import MAX_EMBEDDING_DIM -from letta.errors import EmbeddingConfigRequiredError from letta.helpers.decorators import async_redis_cache from letta.llm_api.llm_client import LLMClient from letta.log import get_logger @@ -193,6 +192,93 @@ class PassageManager: return passage.to_pydantic() + @enforce_types + @trace_method + async def create_agent_passages_async(self, pydantic_passages: List[PydanticPassage], actor: PydanticUser) -> List[PydanticPassage]: + """Create multiple agent passages in a single database transaction. + + Args: + pydantic_passages: List of passages to create + actor: User performing the operation + + Returns: + List of created passages + """ + if not pydantic_passages: + return [] + + import numpy as np + + from letta.helpers.tpuf_client import should_use_tpuf + + use_tpuf = should_use_tpuf() + passage_objects: List[ArchivalPassage] = [] + all_tags_data: List[tuple] = [] # (passage_index, tags) for creating tags after passages are created + + for idx, pydantic_passage in enumerate(pydantic_passages): + if not pydantic_passage.archive_id: + raise ValueError("Agent passage must have archive_id") + if pydantic_passage.source_id: + raise ValueError("Agent passage cannot have source_id") + + data = pydantic_passage.model_dump(to_orm=True) + + # Deduplicate tags if provided (for dual storage consistency) + tags = data.get("tags") + if tags: + tags = list(set(tags)) + all_tags_data.append((idx, tags)) + + # Pad embeddings to MAX_EMBEDDING_DIM for pgvector (only when using Postgres as vector DB) + embedding = data["embedding"] + if embedding and not use_tpuf: + np_embedding = np.array(embedding) + if np_embedding.shape[0] != MAX_EMBEDDING_DIM: + embedding = np.pad(np_embedding, (0, MAX_EMBEDDING_DIM - np_embedding.shape[0]), mode="constant").tolist() + + # Sanitize text to remove null bytes which PostgreSQL rejects + text = data["text"] + if text and "\x00" in text: + text = text.replace("\x00", "") + logger.warning(f"Removed null bytes from passage text (length: {len(data['text'])} -> {len(text)})") + + common_fields = { + "id": data.get("id"), + "text": text, + "embedding": embedding, + "embedding_config": data["embedding_config"], + "organization_id": data["organization_id"], + "metadata_": data.get("metadata_", {}), + "tags": tags, + "is_deleted": data.get("is_deleted", False), + "created_at": data.get("created_at", datetime.now(timezone.utc)), + } + agent_fields = {"archive_id": data["archive_id"]} + passage = ArchivalPassage(**common_fields, **agent_fields) + passage_objects.append(passage) + + async with db_registry.async_session() as session: + # Batch create all passages in a single transaction + created_passages = await ArchivalPassage.batch_create_async( + items=passage_objects, + db_session=session, + actor=actor, + ) + + # Create tags for passages that have them + for idx, tags in all_tags_data: + created_passage = created_passages[idx] + await self._create_tags_for_passage( + session=session, + passage_id=created_passage.id, + archive_id=created_passage.archive_id, + organization_id=created_passage.organization_id, + tags=tags, + actor=actor, + ) + + return [p.to_pydantic() for p in created_passages] + @enforce_types @trace_method async def create_source_passage_async( @@ -474,15 +560,6 @@ class PassageManager: Returns: List of created passage objects """ - if agent_state.embedding_config is None: - raise EmbeddingConfigRequiredError(agent_id=agent_state.id, operation="insert_passage") - - embedding_chunk_size = agent_state.embedding_config.embedding_chunk_size - embedding_client = LLMClient.create( - provider_type=agent_state.embedding_config.embedding_endpoint_type, - actor=actor, - ) - # Get or create the default archive for the agent archive = await self.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=agent_state, actor=actor) @@ -493,8 +570,16 @@ class PassageManager: return [] try: - # Generate embeddings for all chunks using the new async API - embeddings = await embedding_client.request_embeddings(text_chunks, agent_state.embedding_config) + # Generate embeddings if embedding config is available + if agent_state.embedding_config is not None: + embedding_client = LLMClient.create( + provider_type=agent_state.embedding_config.embedding_endpoint_type, + actor=actor, + ) + embeddings = await embedding_client.request_embeddings(text_chunks, agent_state.embedding_config) + else: + # No embedding config - store passages without embeddings (text search only) + embeddings = [None] * len(text_chunks) passages = [] @@ -525,20 +610,21 @@ class PassageManager: tpuf_client = TurbopufferClient() - # Extract IDs and texts from the created passages + # Extract IDs, texts, and embeddings from the created passages passage_ids = [p.id for p in passages] passage_texts = [p.text for p in passages] + passage_embeddings = [p.embedding for p in passages] - # Insert to Turbopuffer with the same IDs as SQL - # TurbopufferClient will generate embeddings internally using default config + # Insert to Turbopuffer with the same IDs as SQL, reusing existing embeddings await tpuf_client.insert_archival_memories( archive_id=archive.id, text_chunks=passage_texts, - passage_ids=passage_ids, # Use same IDs as SQL + passage_ids=passage_ids, organization_id=actor.organization_id, actor=actor, tags=tags, created_at=passages[0].created_at if passages else None, + embeddings=passage_embeddings, ) except Exception as e: logger.error(f"Failed to insert passages to Turbopuffer: {e}") diff --git a/letta/services/provider_manager.py b/letta/services/provider_manager.py index 3757e5fc..1556ecef 100644 --- a/letta/services/provider_manager.py +++ b/letta/services/provider_manager.py @@ -98,9 +98,35 @@ class ProviderManager: deleted_provider.access_key_enc = access_key_secret.get_encrypted() await deleted_provider.update_async(session, actor=actor) + + # Also restore any soft-deleted models associated with this provider + # This is needed because the unique constraint on provider_models doesn't include is_deleted, + # so soft-deleted models would block creation of new models with the same handle + from sqlalchemy import update + + restore_models_stmt = ( + update(ProviderModelORM) + .where( + and_( + ProviderModelORM.provider_id == deleted_provider.id, + ProviderModelORM.is_deleted == True, + ) + ) + .values(is_deleted=False) + ) + result = await session.execute(restore_models_stmt) + if result.rowcount > 0: + logger.info(f"Restored {result.rowcount} soft-deleted model(s) for provider '{request.name}'") + + # Commit the provider and model restoration before syncing + # This is needed because _sync_default_models_for_provider opens a new session + # that can't see uncommitted changes from this session + await session.commit() + provider_pydantic = deleted_provider.to_pydantic() # For BYOK providers, automatically sync available models + # This will add any new models and remove any that are no longer available if is_byok: await self._sync_default_models_for_provider(provider_pydantic, actor) @@ -119,6 +145,14 @@ class ProviderManager: # if provider.name == provider.provider_type.value: # raise ValueError("Provider name must be unique and different from provider type") + # Fill in schema-default base_url if not provided + # This ensures providers like ZAI get their default endpoint persisted to DB + # rather than relying on cast_to_subtype() at read time + if provider.base_url is None: + typed_provider = provider.cast_to_subtype() + if typed_provider.base_url is not None: + provider.base_url = typed_provider.base_url + # Only assign organization id for non-base providers # Base providers should be globally accessible (org_id = None) if is_byok: @@ -201,6 +235,21 @@ class ProviderManager: await existing_provider.update_async(session, actor=actor) return existing_provider.to_pydantic() + @enforce_types + @raise_on_invalid_id(param_name="provider_id", expected_prefix=PrimitiveType.PROVIDER) + async def update_provider_last_synced_async(self, provider_id: str, actor: Optional[PydanticUser] = None) -> None: + """Update the last_synced timestamp for a provider. + + Note: actor is optional to support system-level operations (e.g., during server initialization + for global providers). When actor is provided, org-scoping is enforced. + """ + from datetime import datetime, timezone + + async with db_registry.async_session() as session: + provider = await ProviderModel.read_async(db_session=session, identifier=provider_id, actor=actor) + provider.last_synced = datetime.now(timezone.utc) + await session.commit() + @enforce_types @raise_on_invalid_id(param_name="provider_id", expected_prefix=PrimitiveType.PROVIDER) @trace_method @@ -476,81 +525,19 @@ class ProviderManager: async def _sync_default_models_for_provider(self, provider: PydanticProvider, actor: PydanticUser) -> None: """Sync models for a newly created BYOK provider by querying the provider's API.""" - from letta.log import get_logger - - logger = get_logger(__name__) - try: - # Get the provider class and create an instance - from letta.schemas.enums import ProviderType - from letta.schemas.providers.anthropic import AnthropicProvider - from letta.schemas.providers.azure import AzureProvider - from letta.schemas.providers.bedrock import BedrockProvider - from letta.schemas.providers.google_gemini import GoogleAIProvider - from letta.schemas.providers.groq import GroqProvider - from letta.schemas.providers.ollama import OllamaProvider - from letta.schemas.providers.openai import OpenAIProvider - from letta.schemas.providers.zai import ZAIProvider + # Use cast_to_subtype() which properly handles all provider types and preserves api_key_enc + typed_provider = provider.cast_to_subtype() + llm_models = await typed_provider.list_llm_models_async() + embedding_models = await typed_provider.list_embedding_models_async() - # ChatGPT OAuth requires cast_to_subtype to preserve api_key_enc and id - # (needed for OAuth token refresh and database persistence) - if provider.provider_type == ProviderType.chatgpt_oauth: - provider_instance = provider.cast_to_subtype() - else: - provider_type_to_class = { - "openai": OpenAIProvider, - "anthropic": AnthropicProvider, - "groq": GroqProvider, - "google": GoogleAIProvider, - "ollama": OllamaProvider, - "bedrock": BedrockProvider, - "azure": AzureProvider, - "zai": ZAIProvider, - } - - provider_type = provider.provider_type.value if hasattr(provider.provider_type, "value") else str(provider.provider_type) - provider_class = provider_type_to_class.get(provider_type) - - if not provider_class: - logger.warning(f"No provider class found for type '{provider_type}'") - return - - # Create provider instance with necessary parameters - api_key = await provider.api_key_enc.get_plaintext_async() if provider.api_key_enc else None - access_key = await provider.access_key_enc.get_plaintext_async() if provider.access_key_enc else None - kwargs = { - "name": provider.name, - "api_key": api_key, - "provider_category": provider.provider_category, - } - if provider.base_url: - kwargs["base_url"] = provider.base_url - if access_key: - kwargs["access_key"] = access_key - if provider.region: - kwargs["region"] = provider.region - if provider.api_version: - kwargs["api_version"] = provider.api_version - - provider_instance = provider_class(**kwargs) - - # Query the provider's API for available models - llm_models = await provider_instance.list_llm_models_async() - embedding_models = await provider_instance.list_embedding_models_async() - - # Update handles and provider_name for BYOK providers - for model in llm_models: - model.provider_name = provider.name - model.handle = f"{provider.name}/{model.model}" - model.provider_category = provider.provider_category - - for model in embedding_models: - model.handle = f"{provider.name}/{model.embedding_model}" - - # Use existing sync_provider_models_async to save to database await self.sync_provider_models_async( - provider=provider, llm_models=llm_models, embedding_models=embedding_models, organization_id=actor.organization_id + provider=provider, + llm_models=llm_models, + embedding_models=embedding_models, + organization_id=actor.organization_id, ) + await self.update_provider_last_synced_async(provider.id, actor=actor) except Exception as e: logger.error(f"Failed to sync models for provider '{provider.name}': {e}") @@ -713,7 +700,7 @@ class ProviderManager: enabled=True, model_endpoint_type=llm_config.model_endpoint_type, max_context_window=llm_config.context_window, - supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek"], + supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek", "openrouter"], supports_tool_calling=True, # Assume true for LLMs for now ) @@ -737,14 +724,27 @@ class ProviderManager: # Roll back the session to clear the failed transaction await session.rollback() else: - # Check if max_context_window needs to be updated + # Check if max_context_window or model_endpoint_type needs to be updated existing_model = existing[0] + needs_update = False + if existing_model.max_context_window != llm_config.context_window: logger.info( f" Updating LLM model {llm_config.handle} max_context_window: " f"{existing_model.max_context_window} -> {llm_config.context_window}" ) existing_model.max_context_window = llm_config.context_window + needs_update = True + + if existing_model.model_endpoint_type != llm_config.model_endpoint_type: + logger.info( + f" Updating LLM model {llm_config.handle} model_endpoint_type: " + f"{existing_model.model_endpoint_type} -> {llm_config.model_endpoint_type}" + ) + existing_model.model_endpoint_type = llm_config.model_endpoint_type + needs_update = True + + if needs_update: await existing_model.update_async(session) else: logger.info(f" LLM model {llm_config.handle} already exists (ID: {existing[0].id}), skipping") @@ -801,7 +801,17 @@ class ProviderManager: # Roll back the session to clear the failed transaction await session.rollback() else: - logger.info(f" Embedding model {embedding_config.handle} already exists (ID: {existing[0].id}), skipping") + # Check if model_endpoint_type needs to be updated + existing_model = existing[0] + if existing_model.model_endpoint_type != embedding_config.embedding_endpoint_type: + logger.info( + f" Updating embedding model {embedding_config.handle} model_endpoint_type: " + f"{existing_model.model_endpoint_type} -> {embedding_config.embedding_endpoint_type}" + ) + existing_model.model_endpoint_type = embedding_config.embedding_endpoint_type + await existing_model.update_async(session) + else: + logger.info(f" Embedding model {embedding_config.handle} already exists (ID: {existing[0].id}), skipping") @enforce_types @trace_method @@ -972,8 +982,8 @@ class ProviderManager: # Determine the model endpoint - use provider's base_url if set, # otherwise use provider-specific defaults - if provider.base_url: - model_endpoint = provider.base_url + if typed_provider.base_url: + model_endpoint = typed_provider.base_url elif provider.provider_type == ProviderType.chatgpt_oauth: # ChatGPT OAuth uses the ChatGPT backend API, not a generic endpoint pattern from letta.schemas.providers.chatgpt_oauth import CHATGPT_CODEX_ENDPOINT diff --git a/letta/services/provider_trace_backends/postgres.py b/letta/services/provider_trace_backends/postgres.py index 9980cec9..a70eadf8 100644 --- a/letta/services/provider_trace_backends/postgres.py +++ b/letta/services/provider_trace_backends/postgres.py @@ -2,10 +2,12 @@ from letta.helpers.json_helpers import json_dumps, json_loads from letta.orm.provider_trace import ProviderTrace as ProviderTraceModel -from letta.schemas.provider_trace import ProviderTrace +from letta.orm.provider_trace_metadata import ProviderTraceMetadata as ProviderTraceMetadataModel +from letta.schemas.provider_trace import ProviderTrace, ProviderTraceMetadata from letta.schemas.user import User from letta.server.db import db_registry from letta.services.provider_trace_backends.base import ProviderTraceBackendClient +from letta.settings import telemetry_settings class PostgresProviderTraceBackend(ProviderTraceBackendClient): @@ -15,7 +17,17 @@ class PostgresProviderTraceBackend(ProviderTraceBackendClient): self, actor: User, provider_trace: ProviderTrace, + ) -> ProviderTrace | ProviderTraceMetadata: + if telemetry_settings.provider_trace_pg_metadata_only: + return await self._create_metadata_only_async(actor, provider_trace) + return await self._create_full_async(actor, provider_trace) + + async def _create_full_async( + self, + actor: User, + provider_trace: ProviderTrace, ) -> ProviderTrace: + """Write full provider trace to provider_traces table.""" async with db_registry.async_session() as session: provider_trace_model = ProviderTraceModel(**provider_trace.model_dump()) provider_trace_model.organization_id = actor.organization_id @@ -31,11 +43,44 @@ class PostgresProviderTraceBackend(ProviderTraceBackendClient): await provider_trace_model.create_async(session, actor=actor, no_commit=True, no_refresh=True) return provider_trace_model.to_pydantic() + async def _create_metadata_only_async( + self, + actor: User, + provider_trace: ProviderTrace, + ) -> ProviderTraceMetadata: + """Write metadata-only trace to provider_trace_metadata table.""" + metadata = ProviderTraceMetadata( + id=provider_trace.id, + step_id=provider_trace.step_id, + agent_id=provider_trace.agent_id, + agent_tags=provider_trace.agent_tags, + call_type=provider_trace.call_type, + run_id=provider_trace.run_id, + source=provider_trace.source, + org_id=provider_trace.org_id, + user_id=provider_trace.user_id, + ) + metadata_model = ProviderTraceMetadataModel(**metadata.model_dump()) + metadata_model.organization_id = actor.organization_id + + async with db_registry.async_session() as session: + await metadata_model.create_async(session, actor=actor, no_commit=True, no_refresh=True) + return metadata_model.to_pydantic() + async def get_by_step_id_async( self, step_id: str, actor: User, ) -> ProviderTrace | None: + """Read from provider_traces table. Always reads from full table regardless of write flag.""" + return await self._get_full_by_step_id_async(step_id, actor) + + async def _get_full_by_step_id_async( + self, + step_id: str, + actor: User, + ) -> ProviderTrace | None: + """Read from provider_traces table.""" async with db_registry.async_session() as session: provider_trace_model = await ProviderTraceModel.read_async( db_session=session, @@ -43,3 +88,17 @@ class PostgresProviderTraceBackend(ProviderTraceBackendClient): actor=actor, ) return provider_trace_model.to_pydantic() if provider_trace_model else None + + async def _get_metadata_by_step_id_async( + self, + step_id: str, + actor: User, + ) -> ProviderTraceMetadata | None: + """Read from provider_trace_metadata table.""" + async with db_registry.async_session() as session: + metadata_model = await ProviderTraceMetadataModel.read_async( + db_session=session, + step_id=step_id, + actor=actor, + ) + return metadata_model.to_pydantic() if metadata_model else None diff --git a/letta/services/provider_trace_backends/socket.py b/letta/services/provider_trace_backends/socket.py index dfb4ef8e..1d375e57 100644 --- a/letta/services/provider_trace_backends/socket.py +++ b/letta/services/provider_trace_backends/socket.py @@ -17,7 +17,8 @@ logger = get_logger(__name__) # Protocol version for crouton communication. # Bump this when making breaking changes to the record schema. # Must match ProtocolVersion in apps/crouton/main.go. -PROTOCOL_VERSION = 1 +# v2: Added user_id, compaction_settings (summarization), llm_config (non-summarization) +PROTOCOL_VERSION = 2 class SocketProviderTraceBackend(ProviderTraceBackendClient): @@ -94,6 +95,11 @@ class SocketProviderTraceBackend(ProviderTraceBackendClient): "error": error, "error_type": error_type, "timestamp": datetime.now(timezone.utc).isoformat(), + # v2 protocol fields + "org_id": provider_trace.org_id, + "user_id": provider_trace.user_id, + "compaction_settings": provider_trace.compaction_settings, + "llm_config": provider_trace.llm_config, } # Fire-and-forget in background thread diff --git a/letta/services/run_manager.py b/letta/services/run_manager.py index be550734..c841a62d 100644 --- a/letta/services/run_manager.py +++ b/letta/services/run_manager.py @@ -455,9 +455,11 @@ class RunManager: # Dispatch callback outside of database session if needed if needs_callback: if refresh_result_messages: + # Defensive: ensure stop_reason is never None + stop_reason_value = pydantic_run.stop_reason if pydantic_run.stop_reason else StopReasonType.completed result = LettaResponse( messages=await self.get_run_messages(run_id=run_id, actor=actor), - stop_reason=LettaStopReason(stop_reason=pydantic_run.stop_reason), + stop_reason=LettaStopReason(stop_reason=stop_reason_value), usage=await self.get_run_usage(run_id=run_id, actor=actor), ) final_metadata["result"] = result.model_dump() @@ -719,7 +721,7 @@ class RunManager: ) # Use the standard function to create properly formatted approval response messages - approval_response_messages = create_approval_response_message_from_input( + approval_response_messages = await create_approval_response_message_from_input( agent_state=agent_state, input_message=approval_input, run_id=run_id, diff --git a/letta/services/sandbox_config_manager.py b/letta/services/sandbox_config_manager.py index c34611e3..30849870 100644 --- a/letta/services/sandbox_config_manager.py +++ b/letta/services/sandbox_config_manager.py @@ -167,9 +167,7 @@ class SandboxConfigManager: @enforce_types @trace_method - async def get_sandbox_config_by_type_async( - self, type: SandboxType, actor: Optional[PydanticUser] = None - ) -> Optional[PydanticSandboxConfig]: + async def get_sandbox_config_by_type_async(self, type: SandboxType, actor: PydanticUser) -> Optional[PydanticSandboxConfig]: """Retrieve a sandbox config by its type.""" async with db_registry.async_session() as session: try: @@ -345,7 +343,7 @@ class SandboxConfigManager: @raise_on_invalid_id(param_name="sandbox_config_id", expected_prefix=PrimitiveType.SANDBOX_CONFIG) @trace_method async def get_sandbox_env_var_by_key_and_sandbox_config_id_async( - self, key: str, sandbox_config_id: str, actor: Optional[PydanticUser] = None + self, key: str, sandbox_config_id: str, actor: PydanticUser ) -> Optional[PydanticEnvVar]: """Retrieve a sandbox environment variable by its key and sandbox_config_id.""" async with db_registry.async_session() as session: diff --git a/letta/services/source_manager.py b/letta/services/source_manager.py index b45c9128..6f1891e7 100644 --- a/letta/services/source_manager.py +++ b/letta/services/source_manager.py @@ -448,11 +448,10 @@ class SourceManager: return list(agent_ids) - # TODO: We make actor optional for now, but should most likely be enforced due to security reasons @enforce_types @raise_on_invalid_id(param_name="source_id", expected_prefix=PrimitiveType.SOURCE) @trace_method - async def get_source_by_id(self, source_id: str, actor: Optional[PydanticUser] = None) -> Optional[PydanticSource]: + async def get_source_by_id(self, source_id: str, actor: PydanticUser) -> Optional[PydanticSource]: """Retrieve a source by its ID.""" async with db_registry.async_session() as session: source = await SourceModel.read_async(db_session=session, identifier=source_id, actor=actor) diff --git a/letta/services/streaming_service.py b/letta/services/streaming_service.py index 29e54879..82057622 100644 --- a/letta/services/streaming_service.py +++ b/letta/services/streaming_service.py @@ -38,9 +38,11 @@ from letta.schemas.usage import LettaUsageStatistics from letta.schemas.user import User from letta.server.rest_api.redis_stream_manager import create_background_stream_processor, redis_sse_stream_generator from letta.server.rest_api.streaming_response import ( + RunCancelledException, StreamingResponseWithStatusCode, add_keepalive_to_stream, cancellation_aware_stream_wrapper, + get_cancellation_event_for_run, ) from letta.server.rest_api.utils import capture_sentry_exception from letta.services.run_manager import RunManager @@ -95,7 +97,9 @@ class StreamingService: # load agent and check eligibility agent = await self.server.agent_manager.get_agent_by_id_async( - agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"] + agent_id, + actor, + include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"], ) # Handle model override if specified in the request @@ -166,6 +170,7 @@ class StreamingService: run_manager=self.runs_manager, run_id=run.id, actor=actor, + cancellation_event=get_cancellation_event_for_run(run.id), ) safe_create_task( @@ -193,6 +198,7 @@ class StreamingService: run_manager=self.runs_manager, run_id=run.id, actor=actor, + cancellation_event=get_cancellation_event_for_run(run.id), ) # conditionally wrap with keepalive based on request parameter @@ -351,11 +357,11 @@ class StreamingService: ) async for chunk in stream: - # Track terminal events + # Track terminal events (check at line start to avoid false positives in message content) if isinstance(chunk, str): - if "data: [DONE]" in chunk: + if "\ndata: [DONE]" in chunk or chunk.startswith("data: [DONE]"): saw_done = True - if "event: error" in chunk: + if "\nevent: error" in chunk or chunk.startswith("event: error"): saw_error = True yield chunk @@ -449,17 +455,27 @@ class StreamingService: yield f"event: error\ndata: {error_message.model_dump_json()}\n\n" # Send [DONE] marker to properly close the stream yield "data: [DONE]\n\n" + except RunCancelledException as e: + # Run was explicitly cancelled - this is not an error + # The cancellation has already been handled by cancellation_aware_stream_wrapper + logger.info(f"Run {run_id} was cancelled, exiting stream gracefully") + # Send [DONE] to properly close the stream + yield "data: [DONE]\n\n" + # Don't update run status in finally - cancellation is already recorded + run_status = None # Signal to finally block to skip update except Exception as e: run_status = RunStatus.failed stop_reason = LettaStopReason(stop_reason=StopReasonType.error) + # Use repr() if str() is empty (happens with Exception() with no args) + error_detail = str(e) or repr(e) error_message = LettaErrorMessage( run_id=run_id, error_type="internal_error", message="An unknown error occurred with the LLM streaming request.", - detail=str(e), + detail=error_detail, ) error_data = {"error": error_message.model_dump()} - logger.error(f"Run {run_id} stopped with unknown error: {e}, error_data: {error_message.model_dump()}") + logger.error(f"Run {run_id} stopped with unknown error: {error_detail}, error_data: {error_message.model_dump()}") yield f"data: {stop_reason.model_dump_json()}\n\n" yield f"event: error\ndata: {error_message.model_dump_json()}\n\n" # Send [DONE] marker to properly close the stream @@ -500,11 +516,22 @@ class StreamingService: "groq", "deepseek", "chatgpt_oauth", + "minimax", + "openrouter", ] def _is_token_streaming_compatible(self, agent: AgentState) -> bool: """Check if agent's model supports token-level streaming.""" - base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek", "zai", "chatgpt_oauth"] + base_compatible = agent.llm_config.model_endpoint_type in [ + "anthropic", + "openai", + "bedrock", + "deepseek", + "zai", + "chatgpt_oauth", + "minimax", + "openrouter", + ] google_letta_v1 = agent.agent_type == AgentType.letta_v1_agent and agent.llm_config.model_endpoint_type in [ "google_ai", "google_vertex", diff --git a/letta/services/summarizer/summarizer.py b/letta/services/summarizer/summarizer.py index d2674033..64e9f8ba 100644 --- a/letta/services/summarizer/summarizer.py +++ b/letta/services/summarizer/summarizer.py @@ -49,6 +49,8 @@ class Summarizer: message_manager: Optional[MessageManager] = None, actor: Optional[User] = None, agent_id: Optional[str] = None, + run_id: Optional[str] = None, + step_id: Optional[str] = None, ): self.mode = mode @@ -64,6 +66,8 @@ class Summarizer: self.message_manager = message_manager self.actor = actor self.agent_id = agent_id + self.run_id = run_id + self.step_id = step_id @trace_method async def summarize( @@ -72,6 +76,8 @@ class Summarizer: new_letta_messages: List[Message], force: bool = False, clear: bool = False, + run_id: Optional[str] = None, + step_id: Optional[str] = None, ) -> Tuple[List[Message], bool]: """ Summarizes or trims in_context_messages according to the chosen mode, @@ -81,6 +87,8 @@ class Summarizer: in_context_messages: The existing messages in the conversation's context. new_letta_messages: The newly added Letta messages (just appended). force: Force summarize even if the criteria is not met + run_id: Optional run ID for telemetry (overrides instance default) + step_id: Optional step ID for telemetry (overrides instance default) Returns: (updated_messages, summary_message) @@ -88,6 +96,9 @@ class Summarizer: summary_message: Optional summarization message that was created (could be appended to the conversation if desired) """ + effective_run_id = run_id if run_id is not None else self.run_id + effective_step_id = step_id if step_id is not None else self.step_id + if self.mode == SummarizationMode.STATIC_MESSAGE_BUFFER: return self._static_buffer_summarization( in_context_messages, @@ -101,6 +112,8 @@ class Summarizer: new_letta_messages, force=force, clear=clear, + run_id=effective_run_id, + step_id=effective_step_id, ) else: # Fallback or future logic @@ -124,6 +137,8 @@ class Summarizer: new_letta_messages: List[Message], force: bool = False, clear: bool = False, + run_id: Optional[str] = None, + step_id: Optional[str] = None, ) -> Tuple[List[Message], bool]: """Summarization as implemented in the original MemGPT loop, but using message count instead of token count. Evict a partial amount of messages, and replace message[1] with a recursive summary. @@ -166,6 +181,8 @@ class Summarizer: agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor) # TODO if we do this via the "agent", then we can more easily allow toggling on the memory block version + from letta.settings import summarizer_settings + summary_message_str = await simple_summary( messages=messages_to_summarize, llm_config=agent_state.llm_config, @@ -173,6 +190,14 @@ class Summarizer: include_ack=True, agent_id=self.agent_id, agent_tags=agent_state.tags, + run_id=run_id if run_id is not None else self.run_id, + step_id=step_id if step_id is not None else self.step_id, + compaction_settings={ + "mode": str(summarizer_settings.mode.value), + "message_buffer_limit": summarizer_settings.message_buffer_limit, + "message_buffer_min": summarizer_settings.message_buffer_min, + "partial_evict_summarizer_percentage": summarizer_settings.partial_evict_summarizer_percentage, + }, ) # TODO add counts back @@ -432,6 +457,8 @@ async def simple_summary( agent_id: str | None = None, agent_tags: List[str] | None = None, run_id: str | None = None, + step_id: str | None = None, + compaction_settings: dict | None = None, ) -> str: """Generate a simple summary from a list of messages. @@ -454,7 +481,11 @@ async def simple_summary( agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, + step_id=step_id, call_type="summarization", + org_id=actor.organization_id if actor else None, + user_id=actor.id if actor else None, + compaction_settings=compaction_settings, ) # Prepare the messages payload to send to the LLM diff --git a/letta/services/summarizer/summarizer_all.py b/letta/services/summarizer/summarizer_all.py index 7918f89f..fc183214 100644 --- a/letta/services/summarizer/summarizer_all.py +++ b/letta/services/summarizer/summarizer_all.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from letta.log import get_logger from letta.otel.tracing import trace_method @@ -20,7 +20,11 @@ async def summarize_all( # Actual summarization configuration summarizer_config: CompactionSettings, in_context_messages: List[Message], - # new_messages: List[Message], + # Telemetry context + agent_id: Optional[str] = None, + agent_tags: Optional[List[str]] = None, + run_id: Optional[str] = None, + step_id: Optional[str] = None, ) -> str: """ Summarize the entire conversation history into a single summary. @@ -60,6 +64,14 @@ async def summarize_all( actor=actor, include_ack=bool(summarizer_config.prompt_acknowledgement), prompt=summarizer_config.prompt, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + step_id=step_id, + compaction_settings={ + "mode": "summarize_all", + "clip_chars": summarizer_config.clip_chars, + }, ) logger.info(f"Summarized {len(messages_to_summarize)} messages") diff --git a/letta/services/summarizer/summarizer_sliding_window.py b/letta/services/summarizer/summarizer_sliding_window.py index d38ca58e..87739393 100644 --- a/letta/services/summarizer/summarizer_sliding_window.py +++ b/letta/services/summarizer/summarizer_sliding_window.py @@ -1,4 +1,4 @@ -from typing import List, Tuple +from typing import List, Optional, Tuple from letta.helpers.message_helper import convert_message_creates_to_messages from letta.log import get_logger @@ -50,7 +50,11 @@ async def summarize_via_sliding_window( llm_config: LLMConfig, summarizer_config: CompactionSettings, in_context_messages: List[Message], - # new_messages: List[Message], + # Telemetry context + agent_id: Optional[str] = None, + agent_tags: Optional[List[str]] = None, + run_id: Optional[str] = None, + step_id: Optional[str] = None, ) -> Tuple[str, List[Message]]: """ If the total tokens is greater than the context window limit (or force=True), @@ -138,6 +142,17 @@ async def summarize_via_sliding_window( actor=actor, include_ack=bool(summarizer_config.prompt_acknowledgement), prompt=summarizer_config.prompt, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + step_id=step_id, + compaction_settings={ + "mode": "sliding_window", + "messages_summarized": len(messages_to_summarize), + "messages_kept": total_message_count - assistant_message_index, + "sliding_window_percentage": summarizer_config.sliding_window_percentage, + "clip_chars": summarizer_config.clip_chars, + }, ) if summarizer_config.clip_chars is not None and len(summary_message_str) > summarizer_config.clip_chars: diff --git a/letta/services/tool_executor/core_tool_executor.py b/letta/services/tool_executor/core_tool_executor.py index 24fea8d1..043debba 100644 --- a/letta/services/tool_executor/core_tool_executor.py +++ b/letta/services/tool_executor/core_tool_executor.py @@ -11,6 +11,7 @@ from letta.constants import ( from letta.helpers.json_helpers import json_dumps from letta.helpers.tpuf_client import should_use_tpuf_for_messages from letta.log import get_logger +from letta.orm.errors import NoResultFound from letta.schemas.agent import AgentState from letta.schemas.block import BlockUpdate from letta.schemas.enums import MessageRole, TagMatchMode @@ -400,14 +401,11 @@ class LettaCoreToolExecutor(ToolExecutor): snippet = "\n".join(new_value.split("\n")[start_line : end_line + 1]) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - # success_msg += self._make_output( - # snippet, f"a snippet of {path}", start_line + 1 - # ) - # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n" - success_msg += ( - "Review the changes and make sure they are as expected (correct indentation, " - "no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." ) # return None @@ -548,9 +546,10 @@ class LettaCoreToolExecutor(ToolExecutor): await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor) return ( - f"The core memory block with label `{label}` has been edited. " - "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). " - "Edit the memory block again if necessary." + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). " + f"Edit the memory block again if necessary." ) # Extended mode: parse codex-like patch operations for memory blocks @@ -691,7 +690,11 @@ class LettaCoreToolExecutor(ToolExecutor): else: raise ValueError(f"Unknown operation kind: {kind}") - return "Successfully applied memory patch operations:\n- " + "\n- ".join(results) + return ( + "Successfully applied memory patch operations. " + "Your system prompt has been recompiled with the updated memory contents and is now active in your context.\n\n" + "Operations completed:\n- " + "\n- ".join(results) + ) async def memory_insert( self, @@ -752,16 +755,11 @@ class LettaCoreToolExecutor(ToolExecutor): await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - # success_msg += self._make_output( - # snippet, - # "a snippet of the edited file", - # max(1, insert_line - SNIPPET_LINES + 1), - # ) - # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n" - success_msg += ( - "Review the changes and make sure they are as expected (correct indentation, " - "no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." ) return success_msg @@ -797,14 +795,11 @@ class LettaCoreToolExecutor(ToolExecutor): await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - # success_msg += self._make_output( - # snippet, f"a snippet of {path}", start_line + 1 - # ) - # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n" - success_msg += ( - "Review the changes and make sure they are as expected (correct indentation, " - "no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." ) # return None @@ -832,8 +827,14 @@ class LettaCoreToolExecutor(ToolExecutor): # Update the agent state with the updated memory from the database agent_state.memory = updated_agent_state.memory - return f"Successfully deleted memory block '{label}'" + return ( + f"Successfully deleted memory block '{label}'. " + f"Your system prompt has been recompiled without this memory block and is now active in your context." + ) + except NoResultFound: + # Catch the specific error and re-raise with human-readable names + raise ValueError(f"Memory block '{label}' is not attached to agent '{agent_state.name}'") except Exception as e: return f"Error performing delete: {str(e)}" @@ -852,8 +853,14 @@ class LettaCoreToolExecutor(ToolExecutor): ) await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True) - return f"Successfully updated description of memory block '{label}'" + return ( + f"Successfully updated description of memory block '{label}'. " + f"Your system prompt has been recompiled with the updated description and is now active in your context." + ) + except NoResultFound: + # Catch the specific error and re-raise with human-readable names + raise ValueError(f"Memory block '{label}' not found for agent '{agent_state.name}'") except Exception as e: raise Exception(f"Error performing update_description: {str(e)}") @@ -872,8 +879,14 @@ class LettaCoreToolExecutor(ToolExecutor): await self.block_manager.update_block_async(block_id=memory_block.id, block_update=BlockUpdate(label=new_label), actor=actor) await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True) - return f"Successfully renamed memory block '{old_label}' to '{new_label}'" + return ( + f"Successfully renamed memory block '{old_label}' to '{new_label}'. " + f"Your system prompt has been recompiled with the renamed memory block and is now active in your context." + ) + except NoResultFound: + # Catch the specific error and re-raise with human-readable names + raise ValueError(f"Memory block '{old_label}' not found for agent '{agent_state.name}'") except Exception as e: raise Exception(f"Error performing rename: {str(e)}") @@ -883,7 +896,7 @@ class LettaCoreToolExecutor(ToolExecutor): """Create a memory block by setting its value to an empty string.""" from letta.schemas.block import Block - label = path.removeprefix("/memories/").removeprefix("/").replace("/", "_") + label = path.removeprefix("/memories/").removeprefix("/") # Create a new block and persist it to the database new_block = Block(label=label, value=file_text if file_text else "", description=description) @@ -896,11 +909,14 @@ class LettaCoreToolExecutor(ToolExecutor): agent_state.memory.set_block(persisted_block) await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor) - return f"Successfully created memory block '{label}'" + return ( + f"Successfully created memory block '{label}'. " + f"Your system prompt has been recompiled with the new memory block and is now active in your context." + ) async def memory_str_replace(self, agent_state: AgentState, actor: User, path: str, old_str: str, new_str: str) -> str: """Replace text in a memory block.""" - label = path.removeprefix("/memories/").removeprefix("/").replace("/", "_") + label = path.removeprefix("/memories/").removeprefix("/") memory_block = agent_state.memory.get_block(label) if memory_block is None: @@ -953,10 +969,11 @@ class LettaCoreToolExecutor(ToolExecutor): await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - success_msg += ( - "Review the changes and make sure they are as expected (correct indentation, " - "no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." ) return success_msg @@ -1019,10 +1036,11 @@ class LettaCoreToolExecutor(ToolExecutor): await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True) # Prepare the success message - success_msg = f"The core memory block with label `{label}` has been edited. " - success_msg += ( - "Review the changes and make sure they are as expected (correct indentation, " - "no duplicate lines, etc). Edit the memory block again if necessary." + success_msg = ( + f"The core memory block with label `{label}` has been successfully edited. " + f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. " + f"Review the changes and make sure they are as expected (correct indentation, " + f"no duplicate lines, etc). Edit the memory block again if necessary." ) return success_msg diff --git a/letta/services/tool_executor/tool_execution_sandbox.py b/letta/services/tool_executor/tool_execution_sandbox.py index bc35618b..48b52fe8 100644 --- a/letta/services/tool_executor/tool_execution_sandbox.py +++ b/letta/services/tool_executor/tool_execution_sandbox.py @@ -533,6 +533,24 @@ class ToolExecutionSandbox: code += "\n" + self.tool.source_code + "\n" + if self.args: + raw_args = ", ".join([f"{name!r}: {name}" for name in self.args]) + code += f"__letta_raw_args = {{{raw_args}}}\n" + code += "try:\n" + code += " from letta.functions.ast_parsers import coerce_dict_args_by_annotations\n" + code += f" __letta_func = {self.tool.name}\n" + code += " __letta_annotations = getattr(__letta_func, '__annotations__', {})\n" + code += " __letta_coerced_args = coerce_dict_args_by_annotations(\n" + code += " __letta_raw_args,\n" + code += " __letta_annotations,\n" + code += " allow_unsafe_eval=True,\n" + code += " extra_globals=__letta_func.__globals__,\n" + code += " )\n" + for name in self.args: + code += f" {name} = __letta_coerced_args.get({name!r}, {name})\n" + code += "except Exception:\n" + code += " pass\n" + # TODO: handle wrapped print code += ( diff --git a/letta/services/tool_manager.py b/letta/services/tool_manager.py index 9a1e10a3..0b46d2e9 100644 --- a/letta/services/tool_manager.py +++ b/letta/services/tool_manager.py @@ -167,6 +167,19 @@ def modal_tool_wrapper(tool: PydanticTool, actor: PydanticUser, sandbox_env_vars if "agent_state" in tool_func.__code__.co_varnames: kwargs["agent_state"] = reconstructed_agent_state + try: + from letta.functions.ast_parsers import coerce_dict_args_by_annotations + + annotations = getattr(tool_func, "__annotations__", {}) + kwargs = coerce_dict_args_by_annotations( + kwargs, + annotations, + allow_unsafe_eval=True, + extra_globals=tool_func.__globals__, + ) + except Exception: + pass + # Execute the tool function (async or sync) if is_async: result = asyncio.run(tool_func(**kwargs)) @@ -931,13 +944,28 @@ class ToolManager: # Track if we need to check name uniqueness (check is done inside session with lock) needs_name_conflict_check = new_name != current_tool.name - # NOTE: EXTREMELEY HACKY, we need to stop making assumptions about the source_code - if "source_code" in update_data and f"def {new_name}" not in update_data.get("source_code", ""): - raise LettaToolNameSchemaMismatchError( - tool_name=new_name, - json_schema_name=new_schema.get("name") if new_schema else None, - source_code=update_data.get("source_code"), - ) + # Definitive checker for source code type + if "source_code" in update_data: + source_code = update_data.get("source_code", "") + source_type = update_data.get("source_type", current_tool.source_type) + + # Check for function name based on source type + if source_type == "typescript": + # TypeScript: check for "function name" or "export function name" + if f"function {new_name}" not in source_code: + raise LettaToolNameSchemaMismatchError( + tool_name=new_name, + json_schema_name=new_schema.get("name") if new_schema else None, + source_code=source_code, + ) + else: + # Python: check for "def name" + if f"def {new_name}" not in source_code: + raise LettaToolNameSchemaMismatchError( + tool_name=new_name, + json_schema_name=new_schema.get("name") if new_schema else None, + source_code=source_code, + ) # Create a preview of the updated tool by merging current tool with updates # This allows us to compute the hash before the database session diff --git a/letta/services/tool_sandbox/base.py b/letta/services/tool_sandbox/base.py index 14ded2cb..9b290f8c 100644 --- a/letta/services/tool_sandbox/base.py +++ b/letta/services/tool_sandbox/base.py @@ -259,6 +259,27 @@ class AsyncToolSandboxBase(ABC): if tool_source_code: lines.append(tool_source_code.rstrip()) + if self.args: + raw_args = ", ".join([f"{name!r}: {name}" for name in self.args]) + lines.extend( + [ + f"__letta_raw_args = {{{raw_args}}}", + "try:", + " from letta.functions.ast_parsers import coerce_dict_args_by_annotations", + f" __letta_func = {self.tool.name}", + " __letta_annotations = getattr(__letta_func, '__annotations__', {})", + " __letta_coerced_args = coerce_dict_args_by_annotations(", + " __letta_raw_args,", + " __letta_annotations,", + " allow_unsafe_eval=True,", + " extra_globals=__letta_func.__globals__,", + " )", + ] + ) + for name in self.args: + lines.append(f" {name} = __letta_coerced_args.get({name!r}, {name})") + lines.extend(["except Exception:", " pass"]) + if not self.is_async_function: # sync variant lines.append(f"_function_result = {invoke_function_call}") diff --git a/letta/services/user_manager.py b/letta/services/user_manager.py index e9b6582a..d3c87383 100644 --- a/letta/services/user_manager.py +++ b/letta/services/user_manager.py @@ -120,16 +120,19 @@ class UserManager: NoResultFound: If actor_id is None and no_default_actor setting is True. """ # Security check: if no_default_actor is enabled and actor_id is None, raise error - if settings.no_default_actor and actor_id is None: - raise NoResultFound("Actor ID is required when no_default_actor is enabled") + if settings.no_default_actor and (actor_id is None or actor_id == self.DEFAULT_USER_ID): + raise NoResultFound("Non-default Actor ID is required when no_default_actor is enabled") target_id = actor_id or self.DEFAULT_USER_ID try: return await self.get_actor_by_id_async(target_id) except NoResultFound: - user = await self.create_default_actor_async(org_id=DEFAULT_ORG_ID) - return user + if not settings.no_default_actor: + user = await self.create_default_actor_async(org_id=DEFAULT_ORG_ID) + return user + else: + raise @enforce_types @trace_method diff --git a/letta/settings.py b/letta/settings.py index 365b9d95..f52fca3c 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -37,7 +37,16 @@ class ToolSettings(BaseSettings): mcp_list_tools_timeout: float = 30.0 mcp_execute_tool_timeout: float = 60.0 mcp_read_from_config: bool = False # if False, will throw if attempting to read/write from file - mcp_disable_stdio: bool = False + mcp_disable_stdio: bool = Field( + default=True, + description=( + "Disable MCP stdio server type. When True (default), creating or connecting to " + "MCP servers using stdio transport will fail. Stdio MCP servers spawn local " + "processes, which is not suitable for multi-tenant or shared server deployments. " + "Set to False for local or single-user deployments where stdio-based MCP servers " + "are needed (e.g., running local tools via npx or uvx)." + ), + ) @property def modal_sandbox_enabled(self) -> bool: @@ -141,6 +150,9 @@ class ModelSettings(BaseSettings): # groq groq_api_key: Optional[str] = None + # minimax + minimax_api_key: Optional[str] = None + # Bedrock aws_access_key_id: Optional[str] = None aws_secret_access_key: Optional[str] = None @@ -189,6 +201,10 @@ class ModelSettings(BaseSettings): vllm_api_base: Optional[str] = None vllm_handle_base: Optional[str] = None + # SGLang + sglang_api_base: Optional[str] = None + sglang_handle_base: Optional[str] = None + # lmstudio lmstudio_base_url: Optional[str] = None @@ -503,6 +519,10 @@ class TelemetrySettings(BaseSettings): default=None, description="Source identifier for telemetry (memgpt-server, lettuce-py, etc.).", ) + provider_trace_pg_metadata_only: bool = Field( + default=False, + description="Write only metadata to Postgres (no request/response JSON). Requires provider_trace_metadata table to exist.", + ) @property def provider_trace_backends(self) -> list[str]: diff --git a/letta/validators.py b/letta/validators.py index 4b2f16ee..4e8552c5 100644 --- a/letta/validators.py +++ b/letta/validators.py @@ -44,6 +44,31 @@ def _create_path_validator_factory(primitive: str): PATH_VALIDATORS = {primitive_type.value: _create_path_validator_factory(primitive_type.value) for primitive_type in PrimitiveType} +def _create_conversation_id_or_default_path_validator_factory(): + """Conversation IDs accept the usual primitive format or the special value 'default'.""" + + primitive = PrimitiveType.CONVERSATION.value + prefix_pattern = PRIMITIVE_ID_PATTERNS[primitive].pattern + # Make the full regex accept either the primitive ID format or 'default'. + # `prefix_pattern` already contains the ^...$ anchors. + conversation_or_default_pattern = f"^(default|{prefix_pattern[1:-1]})$" + + def factory(): + return Path( + description=(f"The conversation identifier. Either the special value 'default' or an ID in the format '{primitive}-'"), + pattern=conversation_or_default_pattern, + examples=["default", f"{primitive}-123e4567-e89b-42d3-8456-426614174000"], + min_length=1, + max_length=len(primitive) + 1 + 36, + ) + + return factory + + +# Override conversation ID path validation to also allow the special value 'default'. +PATH_VALIDATORS[PrimitiveType.CONVERSATION.value] = _create_conversation_id_or_default_path_validator_factory() + + # Type aliases for common ID types # These can be used directly in route handler signatures for cleaner code AgentId = Annotated[str, PATH_VALIDATORS[PrimitiveType.AGENT.value]()] @@ -139,7 +164,6 @@ def _create_id_query_validator(primitive: str): Args: primitive: The primitive type prefix (e.g., "agent", "tool") - Returns: A Query validator with pattern matching """ @@ -162,6 +186,8 @@ RunIdQuery = Annotated[Optional[str], _create_id_query_validator(PrimitiveType.R JobIdQuery = Annotated[Optional[str], _create_id_query_validator(PrimitiveType.JOB.value)] GroupIdQuery = Annotated[Optional[str], _create_id_query_validator(PrimitiveType.GROUP.value)] IdentityIdQuery = Annotated[Optional[str], _create_id_query_validator(PrimitiveType.IDENTITY.value)] +UserIdQuery = Annotated[Optional[str], _create_id_query_validator(PrimitiveType.USER.value)] +UserIdQueryRequired = Annotated[str, _create_id_query_validator(PrimitiveType.USER.value)] # ============================================================================= diff --git a/pyproject.toml b/pyproject.toml index a81d9033..a6119383 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "letta" -version = "0.16.3" +version = "0.16.4" description = "Create LLM agents with long-term memory and custom tools" authors = [ {name = "Letta Team", email = "contact@letta.com"}, @@ -75,6 +75,8 @@ dependencies = [ "fastmcp>=2.12.5", "ddtrace>=4.2.1", "clickhouse-connect>=0.10.0", + "aiofiles>=24.1.0", + "async-lru>=2.0.5", ] [project.scripts] diff --git a/sandbox/modal_executor.py b/sandbox/modal_executor.py index 8ee22d09..2b759967 100644 --- a/sandbox/modal_executor.py +++ b/sandbox/modal_executor.py @@ -122,6 +122,19 @@ class ModalFunctionExecutor: if inject_agent_state: kwargs["agent_state"] = agent_state + try: + from letta.functions.ast_parsers import coerce_dict_args_by_annotations + + annotations = getattr(func, "__annotations__", {}) + kwargs = coerce_dict_args_by_annotations( + kwargs, + annotations, + allow_unsafe_eval=True, + extra_globals=func.__globals__, + ) + except Exception: + pass + if is_async: result = asyncio.run(func(**kwargs)) else: diff --git a/tests/configs/llm_model_configs/sglang.json b/tests/configs/llm_model_configs/sglang.json new file mode 100644 index 00000000..c8467deb --- /dev/null +++ b/tests/configs/llm_model_configs/sglang.json @@ -0,0 +1,8 @@ +{ + "context_window": 8192, + "model_endpoint_type": "openai", + "provider_name": "sglang", + "model_endpoint": "http://127.0.0.1:30000/v1", + "model": "Qwen/Qwen2.5-0.5B-Instruct", + "put_inner_thoughts_in_kwargs": true +} diff --git a/tests/data/secret.png b/tests/data/secret.png new file mode 100644 index 00000000..c75d0884 Binary files /dev/null and b/tests/data/secret.png differ diff --git a/tests/integration_test_cancellation.py b/tests/integration_test_cancellation.py index 57f34e2d..6cc7a0bc 100644 --- a/tests/integration_test_cancellation.py +++ b/tests/integration_test_cancellation.py @@ -198,3 +198,8 @@ async def test_background_streaming_cancellation( response = await client.runs.messages.stream(run_id=run_id, starting_after=0) messages_from_stream = await accumulate_chunks(response) assert len(messages_from_stream) > 0 + + # Verify the stream contains stop_reason: cancelled (from our new cancellation logic) + stop_reasons = [msg for msg in messages_from_stream if hasattr(msg, "message_type") and msg.message_type == "stop_reason"] + assert len(stop_reasons) == 1, f"Expected exactly 1 stop_reason in stream, got {len(stop_reasons)}" + assert stop_reasons[0].stop_reason == "cancelled", f"Expected stop_reason 'cancelled', got '{stop_reasons[0].stop_reason}'" diff --git a/tests/integration_test_conversations_sdk.py b/tests/integration_test_conversations_sdk.py index c62a53c2..7899bf44 100644 --- a/tests/integration_test_conversations_sdk.py +++ b/tests/integration_test_conversations_sdk.py @@ -6,6 +6,7 @@ import uuid from time import sleep import pytest +import requests from letta_client import Letta @@ -565,3 +566,145 @@ class TestConversationsSDK: assert len(messages_after) < len(all_messages) # Should not contain the cursor message assert first_message_id not in [m.id for m in messages_after] + + +class TestConversationCompact: + """Tests for the conversation compact (summarization) endpoint.""" + + def test_compact_conversation_basic(self, client: Letta, agent, server_url: str): + """Test basic conversation compaction via the REST endpoint.""" + # Create a conversation + conversation = client.conversations.create(agent_id=agent.id) + + # Send multiple messages to create a history worth summarizing + for i in range(5): + list( + client.conversations.messages.create( + conversation_id=conversation.id, + messages=[{"role": "user", "content": f"Message {i}: Tell me about topic {i}."}], + ) + ) + + # Get initial message count + initial_messages = client.conversations.messages.list( + conversation_id=conversation.id, + order="asc", + ) + initial_count = len(initial_messages) + assert initial_count >= 10 # At least 5 user + 5 assistant messages + + # Call compact endpoint via REST + response = requests.post( + f"{server_url}/v1/conversations/{conversation.id}/compact", + json={}, + ) + assert response.status_code == 200, f"Expected 200, got {response.status_code}: {response.text}" + + result = response.json() + + # Verify the response structure + assert "summary" in result + assert "num_messages_before" in result + assert "num_messages_after" in result + assert isinstance(result["summary"], str) + assert len(result["summary"]) > 0 + assert result["num_messages_before"] > result["num_messages_after"] + + # Verify messages were actually compacted + compacted_messages = client.conversations.messages.list( + conversation_id=conversation.id, + order="asc", + ) + assert len(compacted_messages) < initial_count + + def test_compact_conversation_with_settings(self, client: Letta, agent, server_url: str): + """Test conversation compaction with custom compaction settings.""" + # Create a conversation with multiple messages + conversation = client.conversations.create(agent_id=agent.id) + + for i in range(5): + list( + client.conversations.messages.create( + conversation_id=conversation.id, + messages=[{"role": "user", "content": f"Remember fact {i}: The number {i} is important."}], + ) + ) + + # Call compact with 'all' mode + response = requests.post( + f"{server_url}/v1/conversations/{conversation.id}/compact", + json={ + "compaction_settings": { + "mode": "all", + } + }, + ) + assert response.status_code == 200, f"Expected 200, got {response.status_code}: {response.text}" + + result = response.json() + assert result["num_messages_before"] > result["num_messages_after"] + + def test_compact_conversation_preserves_conversation_isolation(self, client: Letta, agent, server_url: str): + """Test that compacting one conversation doesn't affect another.""" + # Create two conversations + conv1 = client.conversations.create(agent_id=agent.id) + conv2 = client.conversations.create(agent_id=agent.id) + + # Add messages to both + for i in range(5): + list( + client.conversations.messages.create( + conversation_id=conv1.id, + messages=[{"role": "user", "content": f"Conv1 message {i}"}], + ) + ) + list( + client.conversations.messages.create( + conversation_id=conv2.id, + messages=[{"role": "user", "content": f"Conv2 message {i}"}], + ) + ) + + # Get initial counts + conv1_initial = len(client.conversations.messages.list(conversation_id=conv1.id)) + conv2_initial = len(client.conversations.messages.list(conversation_id=conv2.id)) + + # Compact only conv1 + response = requests.post( + f"{server_url}/v1/conversations/{conv1.id}/compact", + json={}, + ) + assert response.status_code == 200 + + # Conv1 should be compacted + conv1_after = len(client.conversations.messages.list(conversation_id=conv1.id)) + assert conv1_after < conv1_initial + + # Conv2 should be unchanged + conv2_after = len(client.conversations.messages.list(conversation_id=conv2.id)) + assert conv2_after == conv2_initial + + def test_compact_conversation_empty_fails(self, client: Letta, agent, server_url: str): + """Test that compacting an empty conversation fails gracefully.""" + # Create a new conversation without messages + conversation = client.conversations.create(agent_id=agent.id) + + # Try to compact - should fail since no messages exist + response = requests.post( + f"{server_url}/v1/conversations/{conversation.id}/compact", + json={}, + ) + + # Should return 400 because there are no in-context messages + assert response.status_code == 400 + + def test_compact_conversation_invalid_id(self, client: Letta, agent, server_url: str): + """Test that compacting with invalid conversation ID returns 404.""" + fake_id = "conv-00000000-0000-0000-0000-000000000000" + + response = requests.post( + f"{server_url}/v1/conversations/{fake_id}/compact", + json={}, + ) + + assert response.status_code == 404 diff --git a/tests/integration_test_human_in_the_loop.py b/tests/integration_test_human_in_the_loop.py index 73584629..6024660f 100644 --- a/tests/integration_test_human_in_the_loop.py +++ b/tests/integration_test_human_in_the_loop.py @@ -1,3 +1,4 @@ +import asyncio import logging import uuid from typing import Any, List @@ -1333,3 +1334,116 @@ def test_agent_records_last_stop_reason_after_approval_flow( # Verify final agent state has the most recent stop reason final_agent = client.agents.retrieve(agent_id=agent.id) assert final_agent.last_stop_reason is not None + + +def test_approve_with_cancellation( + client: Letta, + agent: AgentState, +) -> None: + """ + Test that when approval and cancellation happen simultaneously, + the stream returns stop_reason: cancelled and stream_was_cancelled is set. + """ + import threading + import time + + last_message_cursor = client.agents.messages.list(agent_id=agent.id, limit=1).items[0].id + + # Step 1: Send message that triggers approval request + response = client.agents.messages.create( + agent_id=agent.id, + messages=USER_MESSAGE_TEST_APPROVAL, + ) + tool_call_id = response.messages[-1].tool_call.tool_call_id + + # Step 2: Start cancellation in background thread + def cancel_after_delay(): + time.sleep(0.3) # Wait for stream to start + client.agents.messages.cancel(agent_id=agent.id) + + cancel_thread = threading.Thread(target=cancel_after_delay, daemon=True) + cancel_thread.start() + + # Step 3: Start approval stream (will be cancelled during processing) + response = client.agents.messages.stream( + agent_id=agent.id, + messages=[ + { + "type": "approval", + "approvals": [ + { + "type": "tool", + "tool_call_id": tool_call_id, + "tool_return": SECRET_CODE, + "status": "success", + }, + ], + }, + ], + streaming=True, + stream_tokens=True, + ) + + # Step 4: Accumulate chunks + messages = accumulate_chunks(response) + + # Step 5: Verify we got chunks AND a cancelled stop reason + assert len(messages) > 1, "Should receive at least some chunks before cancellation" + + # Find stop_reason in messages + stop_reasons = [msg for msg in messages if hasattr(msg, "message_type") and msg.message_type == "stop_reason"] + assert len(stop_reasons) == 1, f"Expected exactly 1 stop_reason, got {len(stop_reasons)}" + assert stop_reasons[0].stop_reason == "cancelled", f"Expected stop_reason 'cancelled', got '{stop_reasons[0].stop_reason}'" + + # Step 6: Verify run status is cancelled + runs = client.runs.list(agent_ids=[agent.id]) + latest_run = runs.items[0] + assert latest_run.status == "cancelled", f"Expected run status 'cancelled', got '{latest_run.status}'" + + # Wait for cancel thread to finish + cancel_thread.join(timeout=1.0) + + logger.info(f"✅ Test passed: approval with cancellation handled correctly, received {len(messages)} chunks") + + # Step 7: Verify that approval response message is persisted + messages = client.agents.messages.list(agent_id=agent.id, after=last_message_cursor).items + assert len(messages) > 0, "Should have persisted at least some messages before cancellation" + assert messages[-1].message_type == "tool_return_message", "Last message should be a tool return message" + last_message_cursor = messages[-1].id + + # Step 8: Attempt retry with same response + response = client.agents.messages.stream( + agent_id=agent.id, + messages=[ + { + "type": "approval", + "approvals": [ + { + "type": "tool", + "tool_call_id": tool_call_id, + "tool_return": SECRET_CODE, + "status": "success", + }, + ], + }, + ], + streaming=True, + stream_tokens=True, + ) + + # Step 9: Accumulate chunks + messages = accumulate_chunks(response) + + # Step 10: Verify we got chunks AND an end_turn stop reason + assert len(messages) > 1, "Should receive at least some chunks before cancellation" + + # Find stop_reason in messages + stop_reasons = [msg for msg in messages if hasattr(msg, "message_type") and msg.message_type == "stop_reason"] + assert len(stop_reasons) == 1, f"Expected exactly 1 stop_reason, got {len(stop_reasons)}" + assert stop_reasons[0].stop_reason == "end_turn", f"Expected stop_reason 'end_turn', got '{stop_reasons[0].stop_reason}'" + + # Step 11: Verify keep-alive message was sent + messages = client.agents.messages.list(agent_id=agent.id, after=last_message_cursor).items + assert len(messages) > 0, "Should have persisted new messages" + assert messages[0].message_type == "user_message", "First message should be a user message" + assert "keep-alive" in messages[0].content, f"Expected keep-alive message, got '{messages[0].content}'" diff --git a/tests/integration_test_multi_modal_tool_returns.py b/tests/integration_test_multi_modal_tool_returns.py new file mode 100644 index 00000000..831913e6 --- /dev/null +++ b/tests/integration_test_multi_modal_tool_returns.py @@ -0,0 +1,408 @@ +""" +Integration tests for multi-modal tool returns (images in tool responses). + +These tests verify that: +1. Models supporting images in tool returns can see and describe image content +2. Models NOT supporting images (e.g., Chat Completions API) receive placeholder text +3. The image data is properly passed through the approval flow + +The test uses a secret.png image containing hidden text that the model must identify. +""" + +import base64 +import os +import uuid + +import pytest +from letta_client import Letta +from letta_client.types.agents import ApprovalRequestMessage, AssistantMessage, ToolCallMessage + +# ------------------------------ +# Constants +# ------------------------------ + +# The secret text embedded in the test image +# This is the actual text visible in secret.png +SECRET_TEXT_IN_IMAGE = "FIREBRAWL" + +# Models that support images in tool returns (Responses API, Anthropic, or Google AI) +MODELS_WITH_IMAGE_SUPPORT = [ + "anthropic/claude-sonnet-4-5-20250929", + "openai/gpt-5", # Uses Responses API + "google_ai/gemini-2.5-flash", # Google AI with vision support +] + +# Models that do NOT support images in tool returns (Chat Completions only) +MODELS_WITHOUT_IMAGE_SUPPORT = [ + "openai/gpt-4o-mini", # Uses Chat Completions API, not Responses +] + + +def _load_secret_image() -> str: + """Loads the secret test image and returns it as base64.""" + image_path = os.path.join(os.path.dirname(__file__), "data/secret.png") + with open(image_path, "rb") as f: + return base64.standard_b64encode(f.read()).decode("utf-8") + + +SECRET_IMAGE_BASE64 = _load_secret_image() + + +def get_image_tool_schema(): + """Returns a client-side tool schema that returns an image.""" + return { + "name": "get_secret_image", + "description": "Retrieves a secret image with hidden text. Call this function to get the image.", + "parameters": { + "type": "object", + "properties": {}, + "required": [], + }, + } + + +# ------------------------------ +# Fixtures +# ------------------------------ + + +@pytest.fixture +def client(server_url: str) -> Letta: + """Create a Letta client.""" + return Letta(base_url=server_url) + + +# ------------------------------ +# Test Cases +# ------------------------------ + + +class TestMultiModalToolReturns: + """Test multi-modal (image) content in tool returns.""" + + @pytest.mark.parametrize("model", MODELS_WITH_IMAGE_SUPPORT) + def test_model_can_see_image_in_tool_return(self, client: Letta, model: str) -> None: + """ + Test that models supporting images can see and describe image content + returned from a tool. + + Flow: + 1. User asks agent to get the secret image and tell them what's in it + 2. Agent calls client-side tool, execution pauses + 3. Client provides tool return with image content + 4. Agent processes the image and describes what it sees + 5. Verify the agent mentions the secret text from the image + """ + # Create agent for this test + agent = client.agents.create( + name=f"multimodal_test_{uuid.uuid4().hex[:8]}", + model=model, + embedding="openai/text-embedding-3-small", + include_base_tools=False, + tool_ids=[], + include_base_tool_rules=False, + tool_rules=[], + ) + + try: + tool_schema = get_image_tool_schema() + print(f"\n=== Testing image support with model: {model} ===") + + # Step 1: User asks for the secret image + print("\nStep 1: Asking agent to call get_secret_image tool...") + response1 = client.agents.messages.create( + agent_id=agent.id, + messages=[ + { + "role": "user", + "content": "Call the get_secret_image function now.", + } + ], + client_tools=[tool_schema], + ) + + # Validate Step 1: Should pause with approval request + assert response1.stop_reason.stop_reason == "requires_approval", f"Expected requires_approval, got {response1.stop_reason}" + + # Find the approval request with tool call + approval_msg = None + for msg in response1.messages: + if isinstance(msg, ApprovalRequestMessage): + approval_msg = msg + break + + assert approval_msg is not None, f"Expected an ApprovalRequestMessage but got {[type(m).__name__ for m in response1.messages]}" + assert approval_msg.tool_call.name == "get_secret_image" + + print(f"Tool call ID: {approval_msg.tool_call.tool_call_id}") + + # Step 2: Provide tool return with image content + print("\nStep 2: Providing tool return with image...") + + # Build image content as list of content parts + image_content = [ + {"type": "text", "text": "Here is the secret image:"}, + { + "type": "image", + "source": { + "type": "base64", + "data": SECRET_IMAGE_BASE64, + "media_type": "image/png", + }, + }, + ] + + response2 = client.agents.messages.create( + agent_id=agent.id, + messages=[ + { + "type": "approval", + "approvals": [ + { + "type": "tool", + "tool_call_id": approval_msg.tool_call.tool_call_id, + "tool_return": image_content, + "status": "success", + }, + ], + }, + ], + ) + + # Validate Step 2: Agent should process the image and respond + print(f"Stop reason: {response2.stop_reason}") + print(f"Messages: {len(response2.messages)}") + + # Find the assistant message with the response + assistant_response = None + for msg in response2.messages: + if isinstance(msg, AssistantMessage): + assistant_response = msg.content + print(f"Assistant response: {assistant_response[:200]}...") + break + + assert assistant_response is not None, "Expected an AssistantMessage with the image description" + + # Verify the model saw the secret text in the image + # The model should mention the secret code if it can see the image + assert SECRET_TEXT_IN_IMAGE in assistant_response.upper() or SECRET_TEXT_IN_IMAGE.lower() in assistant_response.lower(), ( + f"Model should have seen the secret text '{SECRET_TEXT_IN_IMAGE}' in the image, but response was: {assistant_response}" + ) + + print("\nSUCCESS: Model correctly identified secret text in image!") + + finally: + # Cleanup + client.agents.delete(agent_id=agent.id) + + @pytest.mark.parametrize("model", MODELS_WITHOUT_IMAGE_SUPPORT) + def test_model_without_image_support_gets_placeholder(self, client: Letta, model: str) -> None: + """ + Test that models NOT supporting images receive placeholder text + and cannot see the actual image content. + + This verifies that Chat Completions API models (which don't support + images in tool results) get a graceful fallback. + + Flow: + 1. User asks agent to get the secret image + 2. Agent calls client-side tool, execution pauses + 3. Client provides tool return with image content + 4. Agent processes but CANNOT see the image (only placeholder text) + 5. Verify the agent does NOT mention the secret text + """ + # Create agent for this test + agent = client.agents.create( + name=f"no_image_test_{uuid.uuid4().hex[:8]}", + model=model, + embedding="openai/text-embedding-3-small", + include_base_tools=False, + tool_ids=[], + include_base_tool_rules=False, + tool_rules=[], + ) + + try: + tool_schema = get_image_tool_schema() + print(f"\n=== Testing placeholder for model without image support: {model} ===") + + # Step 1: User asks for the secret image + print("\nStep 1: Asking agent to call get_secret_image tool...") + response1 = client.agents.messages.create( + agent_id=agent.id, + messages=[ + { + "role": "user", + "content": "Call the get_secret_image function now.", + } + ], + client_tools=[tool_schema], + ) + + # Validate Step 1: Should pause with approval request + assert response1.stop_reason.stop_reason == "requires_approval", f"Expected requires_approval, got {response1.stop_reason}" + + # Find the approval request with tool call + approval_msg = None + for msg in response1.messages: + if isinstance(msg, ApprovalRequestMessage): + approval_msg = msg + break + + assert approval_msg is not None, f"Expected an ApprovalRequestMessage but got {[type(m).__name__ for m in response1.messages]}" + + # Step 2: Provide tool return with image content + print("\nStep 2: Providing tool return with image...") + + image_content = [ + {"type": "text", "text": "Here is the secret image:"}, + { + "type": "image", + "source": { + "type": "base64", + "data": SECRET_IMAGE_BASE64, + "media_type": "image/png", + }, + }, + ] + + response2 = client.agents.messages.create( + agent_id=agent.id, + messages=[ + { + "type": "approval", + "approvals": [ + { + "type": "tool", + "tool_call_id": approval_msg.tool_call.tool_call_id, + "tool_return": image_content, + "status": "success", + }, + ], + }, + ], + ) + + # Find the assistant message + assistant_response = None + for msg in response2.messages: + if isinstance(msg, AssistantMessage): + assistant_response = msg.content + print(f"Assistant response: {assistant_response[:200]}...") + break + + assert assistant_response is not None, "Expected an AssistantMessage" + + # Verify the model did NOT see the secret text (it got placeholder instead) + assert ( + SECRET_TEXT_IN_IMAGE not in assistant_response.upper() and SECRET_TEXT_IN_IMAGE.lower() not in assistant_response.lower() + ), ( + f"Model should NOT have seen the secret text '{SECRET_TEXT_IN_IMAGE}' (it doesn't support images), " + f"but response was: {assistant_response}" + ) + + # The model should mention something about image being omitted/not visible + response_lower = assistant_response.lower() + mentions_image_issue = any( + phrase in response_lower + for phrase in ["image", "omitted", "cannot see", "can't see", "unable to", "not able to", "no image"] + ) + + print("\nSUCCESS: Model correctly did not see the secret (image support not available)") + if mentions_image_issue: + print("Model acknowledged it cannot see the image content") + + finally: + # Cleanup + client.agents.delete(agent_id=agent.id) + + +class TestMultiModalToolReturnsSerialization: + """Test that multi-modal tool returns serialize/deserialize correctly.""" + + @pytest.mark.parametrize("model", MODELS_WITH_IMAGE_SUPPORT[:1]) # Just test one model + def test_tool_return_with_image_persists_in_db(self, client: Letta, model: str) -> None: + """ + Test that tool returns with images are correctly persisted and + can be retrieved from the database. + """ + agent = client.agents.create( + name=f"persist_test_{uuid.uuid4().hex[:8]}", + model=model, + embedding="openai/text-embedding-3-small", + include_base_tools=False, + tool_ids=[], + include_base_tool_rules=False, + tool_rules=[], + ) + + try: + tool_schema = get_image_tool_schema() + + # Trigger tool call + response1 = client.agents.messages.create( + agent_id=agent.id, + messages=[{"role": "user", "content": "Call the get_secret_image tool."}], + client_tools=[tool_schema], + ) + + assert response1.stop_reason.stop_reason == "requires_approval" + + approval_msg = None + for msg in response1.messages: + if isinstance(msg, ApprovalRequestMessage): + approval_msg = msg + break + + assert approval_msg is not None + + # Provide image tool return + image_content = [ + {"type": "text", "text": "Image result"}, + { + "type": "image", + "source": { + "type": "base64", + "data": SECRET_IMAGE_BASE64, + "media_type": "image/png", + }, + }, + ] + + response2 = client.agents.messages.create( + agent_id=agent.id, + messages=[ + { + "type": "approval", + "approvals": [ + { + "type": "tool", + "tool_call_id": approval_msg.tool_call.tool_call_id, + "tool_return": image_content, + "status": "success", + }, + ], + }, + ], + ) + + # Verify we got a response + assert response2.stop_reason is not None + + # Retrieve messages from DB and verify they persisted + messages_from_db = client.agents.messages.list(agent_id=agent.id) + + # Look for the tool return message in the persisted messages + found_tool_return = False + for msg in messages_from_db.items: + # Check if this is a tool return message that might contain our image + if hasattr(msg, "tool_returns") and msg.tool_returns: + found_tool_return = True + break + + # The tool return should have been saved + print(f"Found {len(messages_from_db.items)} messages in DB") + print(f"Tool return message found: {found_tool_return}") + + finally: + client.agents.delete(agent_id=agent.id) diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py index c329180a..4c1d71b4 100644 --- a/tests/integration_test_send_message.py +++ b/tests/integration_test_send_message.py @@ -189,6 +189,7 @@ all_configs = [ "openai-gpt-5.json", # TODO: GPT-5 disabled for now, it sends HiddenReasoningMessages which break the tests. "claude-4-5-sonnet.json", "gemini-2.5-pro.json", + "minimax-m2.1-lightning.json", ] reasoning_configs = [ @@ -243,6 +244,10 @@ def is_reasoner_model(model_handle: str, model_settings: dict) -> bool: elif provider_type in ["google_vertex", "google_ai"]: return model.startswith("gemini-2.5-flash") or model.startswith("gemini-2.5-pro") or model.startswith("gemini-3") + # MiniMax reasoning models (all M2.x models support native interleaved thinking) + elif provider_type == "minimax": + return model.startswith("MiniMax-M2") + return False diff --git a/tests/integration_test_send_message_v2.py b/tests/integration_test_send_message_v2.py index 124b1a5b..ce267e7d 100644 --- a/tests/integration_test_send_message_v2.py +++ b/tests/integration_test_send_message_v2.py @@ -1017,6 +1017,65 @@ async def test_conversation_streaming_raw_http( assert "assistant_message" in message_types, f"Expected assistant_message in {message_types}" +@pytest.mark.parametrize( + "model_config", + TESTED_MODEL_CONFIGS, + ids=[handle for handle, _ in TESTED_MODEL_CONFIGS], +) +@pytest.mark.asyncio(loop_scope="function") +async def test_conversation_non_streaming_raw_http( + disable_e2b_api_key: Any, + client: AsyncLetta, + server_url: str, + agent_state: AgentState, + model_config: Tuple[str, dict], +) -> None: + """ + Test conversation-based non-streaming functionality using raw HTTP requests. + + This test verifies that: + 1. A conversation can be created for an agent + 2. Messages can be sent to the conversation without streaming (streaming=False) + 3. The JSON response contains the expected message types + """ + import httpx + + model_handle, model_settings = model_config + agent_state = await client.agents.update(agent_id=agent_state.id, model=model_handle, model_settings=model_settings) + + async with httpx.AsyncClient(base_url=server_url, timeout=60.0) as http_client: + # Create a conversation for the agent + create_response = await http_client.post( + "/v1/conversations/", + params={"agent_id": agent_state.id}, + json={}, + ) + assert create_response.status_code == 200, f"Failed to create conversation: {create_response.text}" + conversation = create_response.json() + assert conversation["id"] is not None + assert conversation["agent_id"] == agent_state.id + + # Send a message to the conversation using NON-streaming mode + response = await http_client.post( + f"/v1/conversations/{conversation['id']}/messages", + json={ + "messages": [{"role": "user", "content": f"Reply with the message '{USER_MESSAGE_RESPONSE}'."}], + "streaming": False, # Non-streaming mode + }, + ) + assert response.status_code == 200, f"Failed to send message: {response.text}" + + # Parse JSON response (LettaResponse) + result = response.json() + assert "messages" in result, f"Expected 'messages' in response: {result}" + messages = result["messages"] + + # Verify the response contains expected message types + assert len(messages) > 0, "Expected at least one message in response" + message_types = [msg.get("message_type") for msg in messages] + assert "assistant_message" in message_types, f"Expected assistant_message in {message_types}" + + @pytest.mark.parametrize( "model_handle,provider_type", [ diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py index 39998108..fec63a4d 100644 --- a/tests/integration_test_summarizer.py +++ b/tests/integration_test_summarizer.py @@ -816,7 +816,7 @@ async def test_v3_compact_uses_compaction_settings_model_and_model_settings(serv captured_llm_config: dict = {} - async def fake_simple_summary(messages, llm_config, actor, include_ack=True, prompt=None): # type: ignore[override] + async def fake_simple_summary(messages, llm_config, actor, include_ack=True, prompt=None, **kwargs): # type: ignore[override] captured_llm_config["value"] = llm_config return "summary text" diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py index fb9c134f..4a9e8598 100644 --- a/tests/managers/test_agent_manager.py +++ b/tests/managers/test_agent_manager.py @@ -406,8 +406,14 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat tool_rules=None, ) - # Use the static helper on LettaAgentV3 to derive summarizer llm_config - summarizer_llm_config = LettaAgentV3._build_summarizer_llm_config( + # Create a mock agent instance to call the instance method + mock_agent = Mock(spec=LettaAgentV3) + mock_agent.actor = default_user + mock_agent.logger = Mock() + + # Use the instance method to derive summarizer llm_config + summarizer_llm_config = await LettaAgentV3._build_summarizer_llm_config( + mock_agent, agent_llm_config=agent_state.llm_config, summarizer_config=agent_state.compaction_settings, ) diff --git a/tests/managers/test_passage_manager.py b/tests/managers/test_passage_manager.py index 5adb69a4..e5020dea 100644 --- a/tests/managers/test_passage_manager.py +++ b/tests/managers/test_passage_manager.py @@ -222,6 +222,52 @@ async def test_agent_list_passages_filtering(server, default_user, sarah_agent, assert len(date_filtered) == 5 +@pytest.mark.asyncio +async def test_agent_query_passages_time_only(server, default_user, default_archive, disable_turbopuffer): + """Test querying passages with date filters and no query text.""" + now = datetime.now(timezone.utc) + older_date = now - timedelta(days=2) + newer_date = now - timedelta(hours=2) + + older_passage = await server.passage_manager.create_agent_passage_async( + PydanticPassage( + organization_id=default_user.organization_id, + archive_id=default_archive.id, + text="Older passage", + embedding=[0.1], + embedding_config=DEFAULT_EMBEDDING_CONFIG, + created_at=older_date, + ), + actor=default_user, + ) + + newer_passage = await server.passage_manager.create_agent_passage_async( + PydanticPassage( + organization_id=default_user.organization_id, + archive_id=default_archive.id, + text="Newer passage", + embedding=[0.1], + embedding_config=DEFAULT_EMBEDDING_CONFIG, + created_at=newer_date, + ), + actor=default_user, + ) + + results = await server.agent_manager.query_agent_passages_async( + actor=default_user, + archive_id=default_archive.id, + start_date=now - timedelta(days=1), + end_date=now + timedelta(minutes=1), + ) + + assert len(results) == 1 + passage, _, _ = results[0] + assert passage.id == newer_passage.id + assert passage.id != older_passage.id + assert passage.created_at >= now - timedelta(days=1) + assert passage.created_at <= now + timedelta(minutes=1) + + @pytest.fixture def mock_embeddings(): """Load mock embeddings from JSON file""" diff --git a/tests/managers/test_provider_manager.py b/tests/managers/test_provider_manager.py index 70c0f418..fb135be8 100644 --- a/tests/managers/test_provider_manager.py +++ b/tests/managers/test_provider_manager.py @@ -598,6 +598,7 @@ async def test_server_startup_syncs_base_providers(default_user, default_organiz monkeypatch.delenv("GROQ_API_KEY", raising=False) monkeypatch.delenv("TOGETHER_API_KEY", raising=False) monkeypatch.delenv("VLLM_API_BASE", raising=False) + monkeypatch.delenv("SGLANG_API_BASE", raising=False) monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False) monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) monkeypatch.delenv("LMSTUDIO_BASE_URL", raising=False) @@ -622,6 +623,7 @@ async def test_server_startup_syncs_base_providers(default_user, default_organiz monkeypatch.setattr(model_settings, "groq_api_key", None) monkeypatch.setattr(model_settings, "together_api_key", None) monkeypatch.setattr(model_settings, "vllm_api_base", None) + monkeypatch.setattr(model_settings, "sglang_api_base", None) monkeypatch.setattr(model_settings, "aws_access_key_id", None) monkeypatch.setattr(model_settings, "aws_secret_access_key", None) monkeypatch.setattr(model_settings, "lmstudio_base_url", None) diff --git a/tests/mcp_tests/test_mcp.py b/tests/mcp_tests/test_mcp.py index f63882a4..9930a0cd 100644 --- a/tests/mcp_tests/test_mcp.py +++ b/tests/mcp_tests/test_mcp.py @@ -115,7 +115,8 @@ def server_url(empty_mcp_config): if not os.getenv("LETTA_SERVER_URL"): thread = threading.Thread(target=_run_server, daemon=True) thread.start() - wait_for_server(url) + # Use 60s timeout to allow for provider model syncing during server startup + wait_for_server(url, timeout=60) return url diff --git a/tests/model_settings/minimax-m2.1-lightning.json b/tests/model_settings/minimax-m2.1-lightning.json new file mode 100644 index 00000000..7a7f03bd --- /dev/null +++ b/tests/model_settings/minimax-m2.1-lightning.json @@ -0,0 +1,9 @@ +{ + "handle": "minimax/MiniMax-M2.1-lightning", + "model_settings": { + "provider_type": "minimax", + "temperature": 1.0, + "max_output_tokens": 4096, + "parallel_tool_calls": false + } +} diff --git a/tests/test_agent_serialization_v2.py b/tests/test_agent_serialization_v2.py index 26b2d966..8bc6f21b 100644 --- a/tests/test_agent_serialization_v2.py +++ b/tests/test_agent_serialization_v2.py @@ -235,6 +235,15 @@ def embedding_handle_override(): return "openai/text-embedding-ada-002" +@pytest.fixture(scope="function") +def model_handle_override(): + # Use a different OpenAI model handle for override tests. + # The default in tests is usually gpt-4o-mini, so we use gpt-4o. + current_handle = LLMConfig.default_config("gpt-4o-mini").handle or "openai/gpt-4o-mini" + assert current_handle != "openai/gpt-4o" # make sure it's different + return "openai/gpt-4o" + + @pytest.fixture(scope="function") async def test_source(server: SyncServer, default_user): """Fixture to create and return a test source.""" @@ -1166,6 +1175,52 @@ class TestAgentFileImport: imported_agent = await server.agent_manager.get_agent_by_id_async(imported_agent_id, other_user) assert imported_agent.embedding_config.handle == embedding_handle_override + async def test_basic_import_with_model_override( + self, server, agent_serialization_manager, test_agent, default_user, other_user, model_handle_override + ): + """Test basic agent import functionality with LLM model override.""" + # Verify original agent has gpt-4o-mini (handle may be None for legacy configs) + assert "gpt-4o-mini" in (test_agent.llm_config.handle or "") or "gpt-4o-mini" in (test_agent.llm_config.model or "") + + agent_file = await agent_serialization_manager.export([test_agent.id], default_user) + + llm_config_override = await server.get_llm_config_from_handle_async(actor=other_user, handle=model_handle_override) + result = await agent_serialization_manager.import_file(agent_file, other_user, override_llm_config=llm_config_override) + + assert result.success + assert result.imported_count > 0 + assert len(result.id_mappings) > 0 + + for file_id, db_id in result.id_mappings.items(): + if file_id.startswith("agent-"): + assert db_id != test_agent.id # New agent should have different ID + + # check model handle was overridden + imported_agent_id = next(db_id for file_id, db_id in result.id_mappings.items() if file_id == "agent-0") + imported_agent = await server.agent_manager.get_agent_by_id_async(imported_agent_id, other_user) + assert imported_agent.llm_config.handle == model_handle_override + + async def test_basic_import_with_both_overrides( + self, server, agent_serialization_manager, test_agent, default_user, other_user, embedding_handle_override, model_handle_override + ): + """Test agent import with both embedding and model overrides.""" + agent_file = await agent_serialization_manager.export([test_agent.id], default_user) + + embedding_config_override = await server.get_embedding_config_from_handle_async(actor=other_user, handle=embedding_handle_override) + llm_config_override = await server.get_llm_config_from_handle_async(actor=other_user, handle=model_handle_override) + result = await agent_serialization_manager.import_file( + agent_file, other_user, override_embedding_config=embedding_config_override, override_llm_config=llm_config_override + ) + + assert result.success + assert result.imported_count > 0 + + # Verify both overrides were applied + imported_agent_id = next(db_id for file_id, db_id in result.id_mappings.items() if file_id == "agent-0") + imported_agent = await server.agent_manager.get_agent_by_id_async(imported_agent_id, other_user) + assert imported_agent.embedding_config.handle == embedding_handle_override + assert imported_agent.llm_config.handle == model_handle_override + async def test_import_preserves_data(self, server, agent_serialization_manager, test_agent, default_user, other_user): """Test that import preserves all important data.""" agent_file = await agent_serialization_manager.export([test_agent.id], default_user) diff --git a/tests/test_client.py b/tests/test_client.py index 55ceeb0b..7c3b0d08 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -300,10 +300,12 @@ def test_agent_tags(client: Letta, clear_tables): # Test getting all tags all_tags = client.tags.list() + # Filter out dynamic favorite:user tags since they contain user-specific UUIDs + all_tags_filtered = [tag for tag in all_tags if not tag.startswith("favorite:user:")] expected_tags = ["agent1", "agent2", "agent3", "development", "origin:letta-chat", "production", "test", "view:letta-chat"] print("ALL TAGS", all_tags) print("EXPECTED TAGS", expected_tags) - assert sorted(all_tags) == expected_tags + assert sorted(all_tags_filtered) == expected_tags # Test pagination paginated_tags = client.tags.list(limit=2) diff --git a/tests/test_embedding_optional.py b/tests/test_embedding_optional.py new file mode 100644 index 00000000..882834a1 --- /dev/null +++ b/tests/test_embedding_optional.py @@ -0,0 +1,454 @@ +""" +Tests for embedding-optional archival memory feature. + +This file tests that agents can be created without an embedding model +and that archival memory operations (insert, list, search) work correctly +using text-based search when no embeddings are available. +""" + +import os +import threading +import warnings + +import pytest +from dotenv import load_dotenv +from letta_client import Letta as LettaSDKClient +from letta_client.types import CreateBlockParam + +from tests.utils import wait_for_server + +# Constants +SERVER_PORT = 8283 + + +def run_server(): + load_dotenv() + from letta.server.rest_api.app import start_server + + print("Starting server...") + start_server(debug=True) + + +@pytest.fixture(scope="module") +def client() -> LettaSDKClient: + """Get or start a Letta server and return a client.""" + server_url = os.getenv("LETTA_SERVER_URL", f"http://localhost:{SERVER_PORT}") + if not os.getenv("LETTA_SERVER_URL"): + print("Starting server thread") + thread = threading.Thread(target=run_server, daemon=True) + thread.start() + wait_for_server(server_url, timeout=60) + + print("Running embedding-optional tests with server:", server_url) + client = LettaSDKClient(base_url=server_url) + yield client + + +@pytest.fixture(scope="function") +def agent_without_embedding(client: LettaSDKClient): + """Create an agent without an embedding model for testing.""" + agent_state = client.agents.create( + memory_blocks=[ + CreateBlockParam( + label="human", + value="username: test_user", + ), + ], + model="openai/gpt-4o-mini", + # NOTE: Intentionally NOT providing embedding parameter + # to test embedding-optional functionality + ) + + assert agent_state.embedding_config is None, "Agent should have no embedding config" + + yield agent_state + + # Cleanup + client.agents.delete(agent_id=agent_state.id) + + +@pytest.fixture(scope="function") +def agent_with_embedding(client: LettaSDKClient): + """Create an agent WITH an embedding model for comparison testing.""" + agent_state = client.agents.create( + memory_blocks=[ + CreateBlockParam( + label="human", + value="username: test_user_with_embedding", + ), + ], + model="openai/gpt-4o-mini", + embedding="openai/text-embedding-3-small", + ) + + assert agent_state.embedding_config is not None, "Agent should have embedding config" + + yield agent_state + + # Cleanup + client.agents.delete(agent_id=agent_state.id) + + +class TestAgentCreationWithoutEmbedding: + """Tests for agent creation without embedding configuration.""" + + def test_create_agent_without_embedding(self, client: LettaSDKClient): + """Test that an agent can be created without an embedding model.""" + agent_state = client.agents.create( + memory_blocks=[ + CreateBlockParam( + label="human", + value="test user", + ), + ], + model="openai/gpt-4o-mini", + ) + + try: + assert agent_state.id is not None + assert agent_state.id.startswith("agent-") + assert agent_state.embedding_config is None + assert agent_state.llm_config is not None + finally: + client.agents.delete(agent_id=agent_state.id) + + def test_agent_with_and_without_embedding_coexist(self, agent_without_embedding, agent_with_embedding): + """Test that agents with and without embedding can coexist.""" + assert agent_without_embedding.id != agent_with_embedding.id + assert agent_without_embedding.embedding_config is None + assert agent_with_embedding.embedding_config is not None + + +class TestArchivalMemoryInsertWithoutEmbedding: + """Tests for inserting archival memory without embeddings.""" + + def test_insert_passage_without_embedding(self, client: LettaSDKClient, agent_without_embedding): + """Test inserting a passage into an agent without embedding config.""" + agent_id = agent_without_embedding.id + + # Insert a passage - use deprecated API but suppress warning + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + passages = client.agents.passages.create( + agent_id=agent_id, + text="This is a test passage about Python programming.", + ) + + # Should return a list with one passage + assert len(passages) == 1 + passage = passages[0] + + assert passage.id is not None + assert passage.text == "This is a test passage about Python programming." + # Embedding should be None for agents without embedding config + assert passage.embedding is None + assert passage.embedding_config is None + + def test_insert_multiple_passages_without_embedding(self, client: LettaSDKClient, agent_without_embedding): + """Test inserting multiple passages into an agent without embedding.""" + agent_id = agent_without_embedding.id + + test_passages = [ + "Machine learning is a subset of artificial intelligence.", + "Python is widely used for data science applications.", + "Neural networks can learn complex patterns from data.", + ] + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + for text in test_passages: + passages = client.agents.passages.create( + agent_id=agent_id, + text=text, + ) + assert len(passages) == 1 + assert passages[0].embedding is None + + # Verify all passages were inserted + all_passages = client.agents.passages.list(agent_id=agent_id) + + assert len(all_passages) >= 3 + + def test_insert_passage_with_tags_without_embedding(self, client: LettaSDKClient, agent_without_embedding): + """Test inserting a passage with tags into an agent without embedding.""" + agent_id = agent_without_embedding.id + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + passages = client.agents.passages.create( + agent_id=agent_id, + text="Important fact: The sky is blue due to Rayleigh scattering.", + tags=["science", "physics", "important"], + ) + + assert len(passages) == 1 + passage = passages[0] + assert passage.embedding is None + assert passage.tags is not None + assert set(passage.tags) == {"science", "physics", "important"} + + +class TestArchivalMemoryListWithoutEmbedding: + """Tests for listing archival memory without embeddings.""" + + def test_list_passages_without_embedding(self, client: LettaSDKClient, agent_without_embedding): + """Test listing passages from an agent without embedding.""" + agent_id = agent_without_embedding.id + + # Insert some passages first + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + client.agents.passages.create( + agent_id=agent_id, + text="First test passage", + ) + client.agents.passages.create( + agent_id=agent_id, + text="Second test passage", + ) + + # List passages + passages = client.agents.passages.list(agent_id=agent_id) + + assert len(passages) >= 2 + + for passage in passages: + # Verify embeddings are None + assert passage.embedding is None + + def test_list_passages_with_search_filter(self, client: LettaSDKClient, agent_without_embedding): + """Test listing passages with text search filter.""" + agent_id = agent_without_embedding.id + + # Insert passages with distinctive content + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + client.agents.passages.create( + agent_id=agent_id, + text="Apple is a fruit that grows on trees.", + ) + client.agents.passages.create( + agent_id=agent_id, + text="Python is a programming language.", + ) + + # Search for passages containing "fruit" + passages = client.agents.passages.list( + agent_id=agent_id, + search="fruit", + ) + + # Should find the apple passage + assert len(passages) >= 1 + assert any("fruit" in p.text.lower() for p in passages) + + +class TestArchivalMemorySearchWithoutEmbedding: + """Tests for searching archival memory without embeddings (text-based search).""" + + def test_search_passages_without_embedding(self, client: LettaSDKClient, agent_without_embedding): + """Test searching passages using text search (no embeddings).""" + agent_id = agent_without_embedding.id + + # Insert test passages + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + client.agents.passages.create( + agent_id=agent_id, + text="The capital of France is Paris.", + ) + client.agents.passages.create( + agent_id=agent_id, + text="Tokyo is the capital of Japan.", + ) + client.agents.passages.create( + agent_id=agent_id, + text="Python is a popular programming language.", + ) + + # Search for passages about capitals + results = client.agents.passages.search( + agent_id=agent_id, + query="capital", + ) + + # Should find passages about capitals (text search) + assert results is not None + # Check results structure - might be a response object + if hasattr(results, "results"): + assert len(results.results) >= 1 + elif hasattr(results, "__len__"): + assert len(results) >= 0 # Might be empty if text search returns 0 + + def test_global_passage_search_without_embedding(self, client: LettaSDKClient, agent_without_embedding): + """Test global passage search endpoint for agent without embedding.""" + agent_id = agent_without_embedding.id + + # Insert a passage + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + client.agents.passages.create( + agent_id=agent_id, + text="Unique test content for global search testing xyz123.", + ) + + # Use global passage search + results = client.passages.search( + query="xyz123", + agent_id=agent_id, + ) + + # Should find the passage using text search + assert results is not None + + +class TestArchivalMemoryDeleteWithoutEmbedding: + """Tests for deleting archival memory without embeddings.""" + + def test_delete_passage_without_embedding(self, client: LettaSDKClient, agent_without_embedding): + """Test deleting a passage from an agent without embedding.""" + agent_id = agent_without_embedding.id + + # Insert a passage + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + passages = client.agents.passages.create( + agent_id=agent_id, + text="Passage to be deleted", + ) + + passage_id = passages[0].id + + # Delete the passage + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + client.agents.passages.delete( + agent_id=agent_id, + memory_id=passage_id, + ) + + # Verify it's deleted - should not appear in list + remaining = client.agents.passages.list(agent_id=agent_id) + + assert all(p.id != passage_id for p in remaining) + + +class TestComparisonWithAndWithoutEmbedding: + """Compare behavior between agents with and without embedding config.""" + + def test_passage_insert_comparison( + self, + client: LettaSDKClient, + agent_without_embedding, + agent_with_embedding, + ): + """Compare passage insertion between agents with/without embedding.""" + test_text = "Comparison test: This is identical content for both agents." + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + + # Insert into agent without embedding + passages_no_embed = client.agents.passages.create( + agent_id=agent_without_embedding.id, + text=test_text, + ) + + # Insert into agent with embedding + passages_with_embed = client.agents.passages.create( + agent_id=agent_with_embedding.id, + text=test_text, + ) + + # Both should succeed + assert len(passages_no_embed) == 1 + assert len(passages_with_embed) == 1 + + # Text should be identical + assert passages_no_embed[0].text == passages_with_embed[0].text + + # Embedding status should differ + assert passages_no_embed[0].embedding is None + assert passages_with_embed[0].embedding is not None + + def test_list_passages_comparison( + self, + client: LettaSDKClient, + agent_without_embedding, + agent_with_embedding, + ): + """Compare passage listing between agents with/without embedding.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + + # Insert passages into both agents + client.agents.passages.create( + agent_id=agent_without_embedding.id, + text="Test passage for listing comparison", + ) + client.agents.passages.create( + agent_id=agent_with_embedding.id, + text="Test passage for listing comparison", + ) + + # List from both agents + passages_no_embed = client.agents.passages.list(agent_id=agent_without_embedding.id) + passages_with_embed = client.agents.passages.list(agent_id=agent_with_embedding.id) + + # Both should return passages + assert len(passages_no_embed) >= 1 + assert len(passages_with_embed) >= 1 + + +class TestEdgeCases: + """Edge cases and error handling for embedding-optional feature.""" + + def test_empty_archival_memory_search(self, client: LettaSDKClient, agent_without_embedding): + """Test searching an empty archival memory.""" + agent_id = agent_without_embedding.id + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + # Search without any passages - should return empty, not error + results = client.agents.passages.search( + agent_id=agent_id, + query="anything", + ) + + # Should return empty results, not raise an error + assert results is not None + + def test_passage_with_special_characters(self, client: LettaSDKClient, agent_without_embedding): + """Test inserting passages with special characters.""" + agent_id = agent_without_embedding.id + + special_text = "Special chars: @#$%^&*() 日本語 émojis 🎉 " + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + passages = client.agents.passages.create( + agent_id=agent_id, + text=special_text, + ) + + assert len(passages) == 1 + assert passages[0].text == special_text + assert passages[0].embedding is None + + def test_very_long_passage(self, client: LettaSDKClient, agent_without_embedding): + """Test inserting a very long passage.""" + agent_id = agent_without_embedding.id + + # Create a long text (10KB) + long_text = "This is a test. " * 1000 + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + passages = client.agents.passages.create( + agent_id=agent_id, + text=long_text, + ) + + assert len(passages) >= 1 # Might be chunked + # First passage should have no embedding + assert passages[0].embedding is None diff --git a/tests/test_llm_clients.py b/tests/test_llm_clients.py index 934b1a95..7ceb278c 100644 --- a/tests/test_llm_clients.py +++ b/tests/test_llm_clients.py @@ -113,13 +113,10 @@ async def test_count_tokens_with_empty_messages(anthropic_client, llm_config): Test that count_tokens properly handles empty messages by replacing them with placeholders, while preserving the exemption for the final assistant message. """ - import anthropic - with patch("anthropic.AsyncAnthropic") as mock_anthropic_class: mock_client = AsyncMock() mock_count_tokens = AsyncMock() - # Create a mock return value with input_tokens attribute mock_response = AsyncMock() mock_response.input_tokens = 100 mock_count_tokens.return_value = mock_response @@ -198,3 +195,65 @@ async def test_count_tokens_with_empty_messages(anthropic_client, llm_config): call_args = mock_count_tokens.call_args[1] assert call_args["messages"][0]["content"] == "." assert call_args["messages"][1]["content"] == "response" + + +@pytest.mark.asyncio +async def test_count_tokens_strips_trailing_whitespace_from_final_assistant(anthropic_client, llm_config): + """ + Test that count_tokens strips trailing whitespace from the final assistant message. + Anthropic API rejects: "messages: final assistant content cannot end with trailing whitespace" + """ + with patch("anthropic.AsyncAnthropic") as mock_anthropic_class: + mock_client = AsyncMock() + mock_count_tokens = AsyncMock() + + mock_response = AsyncMock() + mock_response.input_tokens = 100 + mock_count_tokens.return_value = mock_response + + mock_client.beta.messages.count_tokens = mock_count_tokens + mock_anthropic_class.return_value = mock_client + + # Test case 1: String content with trailing whitespace + messages_with_trailing_space = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "response "}, + ] + await anthropic_client.count_tokens(messages=messages_with_trailing_space, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"] == "response" # trailing space stripped + + # Test case 2: String content with trailing newline + mock_count_tokens.reset_mock() + messages_with_trailing_newline = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "response\n"}, + ] + await anthropic_client.count_tokens(messages=messages_with_trailing_newline, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"] == "response" # trailing newline stripped + + # Test case 3: List content with trailing whitespace in last text block + mock_count_tokens.reset_mock() + messages_with_trailing_space_in_block = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": [{"type": "text", "text": "response "}]}, + ] + await anthropic_client.count_tokens(messages=messages_with_trailing_space_in_block, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"][0]["text"] == "response" # trailing space stripped + + # Test case 4: Non-final assistant message should NOT have trailing whitespace stripped + mock_count_tokens.reset_mock() + messages_non_final = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "first response "}, + {"role": "user", "content": "followup"}, + ] + await anthropic_client.count_tokens(messages=messages_non_final, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"] == "first response " # preserved for non-final diff --git a/tests/test_minimax_client.py b/tests/test_minimax_client.py new file mode 100644 index 00000000..32ac1d61 --- /dev/null +++ b/tests/test_minimax_client.py @@ -0,0 +1,270 @@ +"""Unit tests for MiniMax client.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from letta.llm_api.minimax_client import MINIMAX_BASE_URL, MiniMaxClient +from letta.schemas.enums import AgentType +from letta.schemas.llm_config import LLMConfig + + +class TestMiniMaxClient: + """Tests for MiniMaxClient.""" + + def setup_method(self): + """Set up test fixtures.""" + self.client = MiniMaxClient(put_inner_thoughts_first=True) + self.llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + def test_is_reasoning_model_always_true(self): + """All MiniMax models support native interleaved thinking.""" + assert self.client.is_reasoning_model(self.llm_config) is True + + # Test with different models + for model_name in ["MiniMax-M2.1", "MiniMax-M2.1-lightning", "MiniMax-M2"]: + config = LLMConfig( + model=model_name, + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + assert self.client.is_reasoning_model(config) is True + + def test_requires_auto_tool_choice(self): + """MiniMax supports all tool choice modes.""" + assert self.client.requires_auto_tool_choice(self.llm_config) is False + + def test_supports_structured_output(self): + """MiniMax doesn't currently advertise structured output support.""" + assert self.client.supports_structured_output(self.llm_config) is False + + @patch("letta.llm_api.minimax_client.model_settings") + def test_get_anthropic_client_with_api_key(self, mock_settings): + """Test client creation with API key.""" + mock_settings.minimax_api_key = "test-api-key" + + with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic: + mock_anthropic.Anthropic.return_value = MagicMock() + + # Mock BYOK to return no override + self.client.get_byok_overrides = MagicMock(return_value=(None, None, None)) + + client = self.client._get_anthropic_client(self.llm_config, async_client=False) + + mock_anthropic.Anthropic.assert_called_once_with( + api_key="test-api-key", + base_url=MINIMAX_BASE_URL, + ) + + @patch("letta.llm_api.minimax_client.model_settings") + def test_get_anthropic_client_async(self, mock_settings): + """Test async client creation.""" + mock_settings.minimax_api_key = "test-api-key" + + with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic: + mock_anthropic.AsyncAnthropic.return_value = MagicMock() + + # Mock BYOK to return no override + self.client.get_byok_overrides = MagicMock(return_value=(None, None, None)) + + client = self.client._get_anthropic_client(self.llm_config, async_client=True) + + mock_anthropic.AsyncAnthropic.assert_called_once_with( + api_key="test-api-key", + base_url=MINIMAX_BASE_URL, + ) + + +class TestMiniMaxClientTemperatureClamping: + """Tests for temperature clamping in build_request_data.""" + + def setup_method(self): + """Set up test fixtures.""" + self.client = MiniMaxClient(put_inner_thoughts_first=True) + self.llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=0.7, + ) + + @patch.object(MiniMaxClient, "build_request_data") + def test_temperature_clamping_is_applied(self, mock_build): + """Verify build_request_data is called for temperature clamping.""" + # This is a basic test to ensure the method exists and can be called + mock_build.return_value = {"temperature": 0.7} + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=self.llm_config, + ) + mock_build.assert_called_once() + + def test_temperature_zero_clamped(self): + """Test that temperature=0 is clamped to 0.01.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=0, + ) + + # Mock the parent class method to return a basic dict + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": 0, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + # Temperature should be clamped to 0.01 + assert result["temperature"] == 0.01 + + def test_temperature_negative_clamped(self): + """Test that negative temperature is clamped to 0.01.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=-0.5, + ) + + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": -0.5, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + assert result["temperature"] == 0.01 + + def test_temperature_above_one_clamped(self): + """Test that temperature > 1.0 is clamped to 1.0.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=1.5, + ) + + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": 1.5, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + assert result["temperature"] == 1.0 + + def test_temperature_valid_not_modified(self): + """Test that valid temperature values are not modified.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=0.7, + ) + + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": 0.7, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + assert result["temperature"] == 0.7 + + +class TestMiniMaxClientUsesNonBetaAPI: + """Tests to verify MiniMax client uses non-beta API.""" + + def test_request_uses_messages_not_beta(self): + """Verify request() uses client.messages.create, not client.beta.messages.create.""" + client = MiniMaxClient(put_inner_thoughts_first=True) + llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + with patch.object(client, "_get_anthropic_client") as mock_get_client: + mock_anthropic_client = MagicMock() + mock_response = MagicMock() + mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]} + mock_anthropic_client.messages.create.return_value = mock_response + mock_get_client.return_value = mock_anthropic_client + + result = client.request({"model": "MiniMax-M2.1"}, llm_config) + + # Verify messages.create was called (not beta.messages.create) + mock_anthropic_client.messages.create.assert_called_once() + # Verify beta was NOT accessed + assert not hasattr(mock_anthropic_client, "beta") or not mock_anthropic_client.beta.messages.create.called + + @pytest.mark.asyncio + async def test_request_async_uses_messages_not_beta(self): + """Verify request_async() uses client.messages.create, not client.beta.messages.create.""" + client = MiniMaxClient(put_inner_thoughts_first=True) + llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + with patch.object(client, "_get_anthropic_client_async") as mock_get_client: + mock_anthropic_client = AsyncMock() + mock_response = MagicMock() + mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]} + mock_anthropic_client.messages.create.return_value = mock_response + mock_get_client.return_value = mock_anthropic_client + + result = await client.request_async({"model": "MiniMax-M2.1"}, llm_config) + + # Verify messages.create was called (not beta.messages.create) + mock_anthropic_client.messages.create.assert_called_once() + + @pytest.mark.asyncio + async def test_stream_async_uses_messages_not_beta(self): + """Verify stream_async() uses client.messages.create, not client.beta.messages.create.""" + client = MiniMaxClient(put_inner_thoughts_first=True) + llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + with patch.object(client, "_get_anthropic_client_async") as mock_get_client: + mock_anthropic_client = AsyncMock() + mock_stream = AsyncMock() + mock_anthropic_client.messages.create.return_value = mock_stream + mock_get_client.return_value = mock_anthropic_client + + result = await client.stream_async({"model": "MiniMax-M2.1"}, llm_config) + + # Verify messages.create was called (not beta.messages.create) + mock_anthropic_client.messages.create.assert_called_once() + # Verify stream=True was set + call_kwargs = mock_anthropic_client.messages.create.call_args[1] + assert call_kwargs.get("stream") is True diff --git a/tests/test_provider_trace.py b/tests/test_provider_trace.py index 256d95ad..d2fc4f47 100644 --- a/tests/test_provider_trace.py +++ b/tests/test_provider_trace.py @@ -1,8 +1,20 @@ +""" +Comprehensive tests for provider trace telemetry. + +Tests verify that provider traces are correctly created with all telemetry context +(agent_id, agent_tags, run_id, step_id, call_type) across: +- Agent steps (non-streaming and streaming) +- Tool calls +- Summarization calls +- Different agent architectures (V2, V3) +""" + import asyncio import os import threading import time import uuid +from unittest.mock import patch import pytest from dotenv import load_dotenv @@ -30,12 +42,11 @@ def server_url(): if not os.getenv("LETTA_SERVER_URL"): thread = threading.Thread(target=_run_server, daemon=True) thread.start() - time.sleep(5) # Allow server startup time + time.sleep(5) return url -# # --- Client Setup --- # @pytest.fixture(scope="session") def client(server_url): """Creates a REST client for testing.""" @@ -53,38 +64,33 @@ def event_loop(request): @pytest.fixture(scope="function") def roll_dice_tool(client, roll_dice_tool_func): - print_tool = client.tools.upsert_from_function(func=roll_dice_tool_func) - yield print_tool + tool = client.tools.upsert_from_function(func=roll_dice_tool_func) + yield tool @pytest.fixture(scope="function") def weather_tool(client, weather_tool_func): - weather_tool = client.tools.upsert_from_function(func=weather_tool_func) - yield weather_tool + tool = client.tools.upsert_from_function(func=weather_tool_func) + yield tool @pytest.fixture(scope="function") def print_tool(client, print_tool_func): - print_tool = client.tools.upsert_from_function(func=print_tool_func) - yield print_tool + tool = client.tools.upsert_from_function(func=print_tool_func) + yield tool @pytest.fixture(scope="function") def agent_state(client, roll_dice_tool, weather_tool): - """Creates an agent and ensures cleanup after tests.""" + """Creates an agent with tools and ensures cleanup after tests.""" agent_state = client.agents.create( - name=f"test_compl_{str(uuid.uuid4())[5:]}", + name=f"test_provider_trace_{str(uuid.uuid4())[:8]}", tool_ids=[roll_dice_tool.id, weather_tool.id], include_base_tools=True, + tags=["test", "provider-trace"], memory_blocks=[ - { - "label": "human", - "value": "Name: Matt", - }, - { - "label": "persona", - "value": "Friendly agent", - }, + {"label": "human", "value": "Name: TestUser"}, + {"label": "persona", "value": "Helpful test agent"}, ], llm_config=LLMConfig.default_config(model_name="gpt-4o-mini"), embedding_config=EmbeddingConfig.default_config(provider="openai"), @@ -93,34 +99,305 @@ def agent_state(client, roll_dice_tool, weather_tool): client.agents.delete(agent_state.id) -@pytest.mark.asyncio -@pytest.mark.parametrize("message", ["Get the weather in San Francisco."]) -async def test_provider_trace_experimental_step(client, message, agent_state): - response = client.agents.messages.create( - agent_id=agent_state.id, messages=[MessageCreate(role="user", content=[TextContent(text=message)])] +@pytest.fixture(scope="function") +def agent_state_with_tags(client, weather_tool): + """Creates an agent with specific tags for tag verification tests.""" + agent_state = client.agents.create( + name=f"test_tagged_agent_{str(uuid.uuid4())[:8]}", + tool_ids=[weather_tool.id], + include_base_tools=True, + tags=["env:test", "team:telemetry", "version:v1"], + memory_blocks=[ + {"label": "human", "value": "Name: TagTestUser"}, + {"label": "persona", "value": "Agent with tags"}, + ], + llm_config=LLMConfig.default_config(model_name="gpt-4o-mini"), + embedding_config=EmbeddingConfig.default_config(provider="openai"), ) - tool_step = response.messages[0].step_id - reply_step = response.messages[-1].step_id - - tool_telemetry = client.telemetry.retrieve_provider_trace(step_id=tool_step) - reply_telemetry = client.telemetry.retrieve_provider_trace(step_id=reply_step) - assert tool_telemetry.request_json - assert reply_telemetry.request_json + yield agent_state + client.agents.delete(agent_state.id) -@pytest.mark.asyncio -@pytest.mark.parametrize("message", ["Get the weather in San Francisco."]) -async def test_provider_trace_experimental_step_stream(client, message, agent_state): - last_message_id = client.agents.messages.list(agent_id=agent_state.id, limit=1)[0] - stream = client.agents.messages.create_stream( - agent_id=agent_state.id, messages=[MessageCreate(role="user", content=[TextContent(text=message)])] - ) +class TestProviderTraceBasicStep: + """Tests for basic agent step provider traces.""" - list(stream) + @pytest.mark.asyncio + async def test_non_streaming_step_creates_provider_trace(self, client, agent_state): + """Verify provider trace is created for non-streaming agent step.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello, how are you?")])], + ) - messages = client.agents.messages.list(agent_id=agent_state.id, after=last_message_id) - step_ids = [id for id in set((message.step_id for message in messages)) if id is not None] - for step_id in step_ids: - telemetry_data = client.telemetry.retrieve_provider_trace(step_id=step_id) - assert telemetry_data.request_json - assert telemetry_data.response_json + assert len(response.messages) > 0 + step_id = response.messages[-1].step_id + assert step_id is not None + + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + assert trace is not None + assert trace.request_json is not None + assert trace.response_json is not None + + @pytest.mark.asyncio + async def test_streaming_step_creates_provider_trace(self, client, agent_state): + """Verify provider trace is created for streaming agent step.""" + last_message = client.agents.messages.list(agent_id=agent_state.id, limit=1)[0] + + stream = client.agents.messages.create_stream( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Tell me a joke.")])], + ) + list(stream) + + messages = client.agents.messages.list(agent_id=agent_state.id, after=last_message.id) + step_ids = list({msg.step_id for msg in messages if msg.step_id is not None}) + + assert len(step_ids) > 0 + for step_id in step_ids: + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + assert trace is not None + assert trace.request_json is not None + assert trace.response_json is not None + + +class TestProviderTraceWithToolCalls: + """Tests for provider traces when tools are called.""" + + @pytest.mark.asyncio + async def test_tool_call_step_has_provider_trace(self, client, agent_state): + """Verify provider trace exists for steps that invoke tools.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Get the weather in San Francisco.")])], + ) + + tool_call_step_id = response.messages[0].step_id + final_step_id = response.messages[-1].step_id + + tool_trace = client.telemetry.retrieve_provider_trace(step_id=tool_call_step_id) + assert tool_trace is not None + assert tool_trace.request_json is not None + + if tool_call_step_id != final_step_id: + final_trace = client.telemetry.retrieve_provider_trace(step_id=final_step_id) + assert final_trace is not None + assert final_trace.request_json is not None + + @pytest.mark.asyncio + async def test_streaming_tool_call_has_provider_trace(self, client, agent_state): + """Verify provider trace exists for streaming steps with tool calls.""" + last_message = client.agents.messages.list(agent_id=agent_state.id, limit=1)[0] + + stream = client.agents.messages.create_stream( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Roll the dice for me.")])], + ) + list(stream) + + messages = client.agents.messages.list(agent_id=agent_state.id, after=last_message.id) + step_ids = list({msg.step_id for msg in messages if msg.step_id is not None}) + + assert len(step_ids) > 0 + for step_id in step_ids: + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + assert trace is not None + assert trace.request_json is not None + + +class TestProviderTraceTelemetryContext: + """Tests verifying telemetry context fields are correctly populated.""" + + @pytest.mark.asyncio + async def test_provider_trace_contains_agent_id(self, client, agent_state): + """Verify provider trace contains the correct agent_id.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + + step_id = response.messages[-1].step_id + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + + assert trace is not None + assert trace.agent_id == agent_state.id + + @pytest.mark.asyncio + async def test_provider_trace_contains_agent_tags(self, client, agent_state_with_tags): + """Verify provider trace contains the agent's tags.""" + response = client.agents.messages.create( + agent_id=agent_state_with_tags.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + + step_id = response.messages[-1].step_id + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + + assert trace is not None + assert trace.agent_tags is not None + assert set(trace.agent_tags) == {"env:test", "team:telemetry", "version:v1"} + + @pytest.mark.asyncio + async def test_provider_trace_contains_step_id(self, client, agent_state): + """Verify provider trace step_id matches the message step_id.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + + step_id = response.messages[-1].step_id + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + + assert trace is not None + assert trace.step_id == step_id + + @pytest.mark.asyncio + async def test_provider_trace_contains_run_id_for_async_job(self, client, agent_state): + """Verify provider trace contains run_id when created via async job.""" + job = client.agents.messages.create_async( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + + while job.status not in ["completed", "failed"]: + time.sleep(0.5) + job = client.jobs.retrieve(job.id) + + assert job.status == "completed" + + messages = client.agents.messages.list(agent_id=agent_state.id, limit=5) + step_ids = list({msg.step_id for msg in messages if msg.step_id is not None}) + + assert len(step_ids) > 0 + trace = client.telemetry.retrieve_provider_trace(step_id=step_ids[0]) + assert trace is not None + assert trace.run_id == job.id + + +class TestProviderTraceMultiStep: + """Tests for provider traces across multiple agent steps.""" + + @pytest.mark.asyncio + async def test_multi_step_conversation_has_traces_for_each_step(self, client, agent_state): + """Verify each step in a multi-step conversation has its own provider trace.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[ + MessageCreate( + role="user", + content=[TextContent(text="First, get the weather in NYC. Then roll the dice.")], + ) + ], + ) + + step_ids = list({msg.step_id for msg in response.messages if msg.step_id is not None}) + + assert len(step_ids) >= 1 + + for step_id in step_ids: + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + assert trace is not None, f"No trace found for step_id={step_id}" + assert trace.request_json is not None + assert trace.agent_id == agent_state.id + + @pytest.mark.asyncio + async def test_consecutive_messages_have_separate_traces(self, client, agent_state): + """Verify consecutive messages create separate traces.""" + response1 = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + step_id_1 = response1.messages[-1].step_id + + response2 = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="How are you?")])], + ) + step_id_2 = response2.messages[-1].step_id + + assert step_id_1 != step_id_2 + + trace1 = client.telemetry.retrieve_provider_trace(step_id=step_id_1) + trace2 = client.telemetry.retrieve_provider_trace(step_id=step_id_2) + + assert trace1 is not None + assert trace2 is not None + assert trace1.id != trace2.id + + +class TestProviderTraceRequestResponseContent: + """Tests verifying request and response JSON content.""" + + @pytest.mark.asyncio + async def test_request_json_contains_model(self, client, agent_state): + """Verify request_json contains model information.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + + step_id = response.messages[-1].step_id + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + + assert trace is not None + assert trace.request_json is not None + assert "model" in trace.request_json + + @pytest.mark.asyncio + async def test_request_json_contains_messages(self, client, agent_state): + """Verify request_json contains messages array.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + + step_id = response.messages[-1].step_id + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + + assert trace is not None + assert trace.request_json is not None + assert "messages" in trace.request_json + assert isinstance(trace.request_json["messages"], list) + + @pytest.mark.asyncio + async def test_response_json_contains_usage(self, client, agent_state): + """Verify response_json contains usage statistics.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hello")])], + ) + + step_id = response.messages[-1].step_id + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + + assert trace is not None + assert trace.response_json is not None + assert "usage" in trace.response_json or "usage" in str(trace.response_json) + + +class TestProviderTraceEdgeCases: + """Tests for edge cases and error scenarios.""" + + @pytest.mark.asyncio + async def test_nonexistent_step_id_returns_none_or_empty(self, client): + """Verify querying nonexistent step_id handles gracefully.""" + fake_step_id = f"step-{uuid.uuid4()}" + + try: + trace = client.telemetry.retrieve_provider_trace(step_id=fake_step_id) + assert trace is None or trace.request_json is None + except Exception: + pass + + @pytest.mark.asyncio + async def test_empty_message_still_creates_trace(self, client, agent_state): + """Verify trace is created even for minimal messages.""" + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=[MessageCreate(role="user", content=[TextContent(text="Hi")])], + ) + + step_id = response.messages[-1].step_id + assert step_id is not None + + trace = client.telemetry.retrieve_provider_trace(step_id=step_id) + assert trace is not None diff --git a/tests/test_provider_trace_agents.py b/tests/test_provider_trace_agents.py new file mode 100644 index 00000000..830d776c --- /dev/null +++ b/tests/test_provider_trace_agents.py @@ -0,0 +1,408 @@ +""" +Unit tests for provider trace telemetry across agent versions and adapters. + +Tests verify that telemetry context is correctly passed through: +- Tool generation endpoint +- LettaAgent (v1), LettaAgentV2, LettaAgentV3 +- Streaming and non-streaming paths +- Different stream adapters +""" + +import uuid +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from letta.schemas.llm_config import LLMConfig + + +@pytest.fixture +def mock_llm_config(): + """Create a mock LLM config.""" + return LLMConfig( + model="gpt-4o-mini", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=8000, + ) + + +class TestToolGenerationTelemetry: + """Tests for tool generation endpoint telemetry.""" + + @pytest.mark.asyncio + async def test_generate_tool_sets_call_type(self, mock_llm_config): + """Verify generate_tool endpoint sets call_type='tool_generation'.""" + from letta.llm_api.llm_client import LLMClient + from letta.schemas.user import User + + mock_actor = User( + id=f"user-{uuid.uuid4()}", + organization_id=f"org-{uuid.uuid4()}", + name="test_user", + ) + + captured_telemetry = {} + + def capture_telemetry(**kwargs): + captured_telemetry.update(kwargs) + + with patch.object(LLMClient, "create") as mock_create: + mock_client = MagicMock() + mock_client.set_telemetry_context = capture_telemetry + mock_client.build_request_data = MagicMock(return_value={}) + mock_client.request_async_with_telemetry = AsyncMock(return_value={}) + mock_client.convert_response_to_chat_completion = AsyncMock( + return_value=MagicMock( + choices=[ + MagicMock( + message=MagicMock( + tool_calls=[ + MagicMock( + function=MagicMock( + arguments='{"raw_source_code": "def test(): pass", "sample_args_json": "{}", "pip_requirements_json": "{}"}' + ) + ) + ], + content=None, + ) + ) + ] + ) + ) + mock_create.return_value = mock_client + + from letta.server.rest_api.routers.v1.tools import GenerateToolInput, generate_tool_from_prompt + + mock_server = MagicMock() + mock_server.user_manager.get_actor_or_default_async = AsyncMock(return_value=mock_actor) + mock_server.get_llm_config_from_handle_async = AsyncMock(return_value=mock_llm_config) + + mock_headers = MagicMock() + mock_headers.actor_id = mock_actor.id + + request = GenerateToolInput( + prompt="Create a function that adds two numbers", + tool_name="add_numbers", + validation_errors=[], + ) + + with patch("letta.server.rest_api.routers.v1.tools.derive_openai_json_schema") as mock_schema: + mock_schema.return_value = {"name": "add_numbers", "parameters": {}} + try: + await generate_tool_from_prompt(request=request, server=mock_server, headers=mock_headers) + except Exception: + pass + + assert captured_telemetry.get("call_type") == "tool_generation" + + @pytest.mark.asyncio + async def test_generate_tool_has_no_agent_context(self, mock_llm_config): + """Verify generate_tool doesn't have agent_id since it's not agent-bound.""" + from letta.llm_api.llm_client import LLMClient + from letta.schemas.user import User + + mock_actor = User( + id=f"user-{uuid.uuid4()}", + organization_id=f"org-{uuid.uuid4()}", + name="test_user", + ) + + captured_telemetry = {} + + def capture_telemetry(**kwargs): + captured_telemetry.update(kwargs) + + with patch.object(LLMClient, "create") as mock_create: + mock_client = MagicMock() + mock_client.set_telemetry_context = capture_telemetry + mock_client.build_request_data = MagicMock(return_value={}) + mock_client.request_async_with_telemetry = AsyncMock(return_value={}) + mock_client.convert_response_to_chat_completion = AsyncMock( + return_value=MagicMock( + choices=[ + MagicMock( + message=MagicMock( + tool_calls=[ + MagicMock( + function=MagicMock( + arguments='{"raw_source_code": "def test(): pass", "sample_args_json": "{}", "pip_requirements_json": "{}"}' + ) + ) + ], + content=None, + ) + ) + ] + ) + ) + mock_create.return_value = mock_client + + from letta.server.rest_api.routers.v1.tools import GenerateToolInput, generate_tool_from_prompt + + mock_server = MagicMock() + mock_server.user_manager.get_actor_or_default_async = AsyncMock(return_value=mock_actor) + mock_server.get_llm_config_from_handle_async = AsyncMock(return_value=mock_llm_config) + + mock_headers = MagicMock() + mock_headers.actor_id = mock_actor.id + + request = GenerateToolInput( + prompt="Create a function", + tool_name="test_func", + validation_errors=[], + ) + + with patch("letta.server.rest_api.routers.v1.tools.derive_openai_json_schema") as mock_schema: + mock_schema.return_value = {"name": "test_func", "parameters": {}} + try: + await generate_tool_from_prompt(request=request, server=mock_server, headers=mock_headers) + except Exception: + pass + + assert captured_telemetry.get("agent_id") is None + assert captured_telemetry.get("step_id") is None + assert captured_telemetry.get("run_id") is None + + +class TestLLMClientTelemetryContext: + """Tests for LLMClient telemetry context methods.""" + + def test_llm_client_has_set_telemetry_context_method(self): + """Verify LLMClient exposes set_telemetry_context.""" + from letta.llm_api.llm_client import LLMClient + + client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) + assert hasattr(client, "set_telemetry_context") + assert callable(client.set_telemetry_context) + + def test_llm_client_set_telemetry_context_accepts_all_fields(self): + """Verify set_telemetry_context accepts all telemetry fields.""" + from letta.llm_api.llm_client import LLMClient + + client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) + + client.set_telemetry_context( + agent_id=f"agent-{uuid.uuid4()}", + agent_tags=["tag1", "tag2"], + run_id=f"run-{uuid.uuid4()}", + step_id=f"step-{uuid.uuid4()}", + call_type="summarization", + ) + + +class TestAdapterTelemetryAttributes: + """Tests for adapter telemetry attribute support.""" + + def test_base_adapter_has_telemetry_attributes(self, mock_llm_config): + """Verify base LettaLLMAdapter has telemetry attributes.""" + from letta.adapters.letta_llm_adapter import LettaLLMAdapter + from letta.llm_api.llm_client import LLMClient + + mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) + + agent_id = f"agent-{uuid.uuid4()}" + agent_tags = ["test-tag"] + run_id = f"run-{uuid.uuid4()}" + + class TestAdapter(LettaLLMAdapter): + async def invoke_llm(self, *args, **kwargs): + pass + + adapter = TestAdapter( + llm_client=mock_client, + llm_config=mock_llm_config, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + ) + + assert adapter.agent_id == agent_id + assert adapter.agent_tags == agent_tags + assert adapter.run_id == run_id + + def test_request_adapter_inherits_telemetry_attributes(self, mock_llm_config): + """Verify LettaLLMRequestAdapter inherits telemetry attributes.""" + from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter + from letta.llm_api.llm_client import LLMClient + + mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) + + agent_id = f"agent-{uuid.uuid4()}" + agent_tags = ["request-tag"] + run_id = f"run-{uuid.uuid4()}" + + adapter = LettaLLMRequestAdapter( + llm_client=mock_client, + llm_config=mock_llm_config, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + ) + + assert adapter.agent_id == agent_id + assert adapter.agent_tags == agent_tags + assert adapter.run_id == run_id + + def test_stream_adapter_inherits_telemetry_attributes(self, mock_llm_config): + """Verify LettaLLMStreamAdapter inherits telemetry attributes.""" + from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter + from letta.llm_api.llm_client import LLMClient + + mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) + + agent_id = f"agent-{uuid.uuid4()}" + agent_tags = ["stream-tag"] + run_id = f"run-{uuid.uuid4()}" + + adapter = LettaLLMStreamAdapter( + llm_client=mock_client, + llm_config=mock_llm_config, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + ) + + assert adapter.agent_id == agent_id + assert adapter.agent_tags == agent_tags + assert adapter.run_id == run_id + + def test_request_and_stream_adapters_have_consistent_interface(self, mock_llm_config): + """Verify both adapter types have the same telemetry interface.""" + from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter + from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter + from letta.llm_api.llm_client import LLMClient + + mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) + + request_adapter = LettaLLMRequestAdapter(llm_client=mock_client, llm_config=mock_llm_config) + stream_adapter = LettaLLMStreamAdapter(llm_client=mock_client, llm_config=mock_llm_config) + + for attr in ["agent_id", "agent_tags", "run_id"]: + assert hasattr(request_adapter, attr), f"LettaLLMRequestAdapter missing {attr}" + assert hasattr(stream_adapter, attr), f"LettaLLMStreamAdapter missing {attr}" + + +class TestSummarizerTelemetry: + """Tests for Summarizer class telemetry context.""" + + def test_summarizer_stores_telemetry_context(self): + """Verify Summarizer stores telemetry context from constructor.""" + from letta.schemas.user import User + from letta.services.summarizer.enums import SummarizationMode + from letta.services.summarizer.summarizer import Summarizer + + mock_actor = User( + id=f"user-{uuid.uuid4()}", + organization_id=f"org-{uuid.uuid4()}", + name="test_user", + ) + + agent_id = f"agent-{uuid.uuid4()}" + run_id = f"run-{uuid.uuid4()}" + step_id = f"step-{uuid.uuid4()}" + + summarizer = Summarizer( + mode=SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER, + summarizer_agent=None, + message_buffer_limit=100, + message_buffer_min=10, + partial_evict_summarizer_percentage=0.5, + agent_manager=MagicMock(), + message_manager=MagicMock(), + actor=mock_actor, + agent_id=agent_id, + run_id=run_id, + step_id=step_id, + ) + + assert summarizer.agent_id == agent_id + assert summarizer.run_id == run_id + assert summarizer.step_id == step_id + + @pytest.mark.asyncio + async def test_summarize_method_accepts_runtime_telemetry(self): + """Verify summarize() method accepts runtime run_id/step_id.""" + from letta.schemas.enums import MessageRole + from letta.schemas.message import Message + from letta.schemas.user import User + from letta.services.summarizer.enums import SummarizationMode + from letta.services.summarizer.summarizer import Summarizer + + mock_actor = User( + id=f"user-{uuid.uuid4()}", + organization_id=f"org-{uuid.uuid4()}", + name="test_user", + ) + + agent_id = f"agent-{uuid.uuid4()}" + mock_messages = [ + Message( + id=f"message-{uuid.uuid4()}", + role=MessageRole.user, + content=[{"type": "text", "text": "Hello"}], + agent_id=agent_id, + ) + ] + + summarizer = Summarizer( + mode=SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER, + summarizer_agent=None, + message_buffer_limit=100, + message_buffer_min=10, + partial_evict_summarizer_percentage=0.5, + agent_manager=MagicMock(), + message_manager=MagicMock(), + actor=mock_actor, + agent_id=agent_id, + ) + + run_id = f"run-{uuid.uuid4()}" + step_id = f"step-{uuid.uuid4()}" + + result = await summarizer.summarize( + in_context_messages=mock_messages, + new_letta_messages=[], + force=False, + run_id=run_id, + step_id=step_id, + ) + + assert result is not None + + +class TestAgentAdapterInstantiation: + """Tests verifying agents instantiate adapters with telemetry context.""" + + def test_agent_v2_creates_summarizer_with_agent_id(self, mock_llm_config): + """Verify LettaAgentV2 creates Summarizer with correct agent_id.""" + from letta.agents.letta_agent_v2 import LettaAgentV2 + from letta.schemas.agent import AgentState, AgentType + from letta.schemas.embedding_config import EmbeddingConfig + from letta.schemas.memory import Memory + from letta.schemas.user import User + + mock_actor = User( + id=f"user-{uuid.uuid4()}", + organization_id=f"org-{uuid.uuid4()}", + name="test_user", + ) + + agent_id = f"agent-{uuid.uuid4()}" + agent_state = AgentState( + id=agent_id, + name="test_agent", + agent_type=AgentType.letta_v1_agent, + llm_config=mock_llm_config, + embedding_config=EmbeddingConfig.default_config(provider="openai"), + tags=["test"], + memory=Memory(blocks=[]), + system="You are a helpful assistant.", + tools=[], + sources=[], + blocks=[], + ) + + agent = LettaAgentV2(agent_state=agent_state, actor=mock_actor) + + assert agent.summarizer.agent_id == agent_id diff --git a/tests/test_provider_trace_backends.py b/tests/test_provider_trace_backends.py index a088d368..f1051d1c 100644 --- a/tests/test_provider_trace_backends.py +++ b/tests/test_provider_trace_backends.py @@ -92,6 +92,48 @@ class TestProviderTrace: assert trace.call_type == "summarization" assert trace.run_id == "run-789" + def test_v2_protocol_fields(self): + """Test v2 protocol fields (org_id, user_id, compaction_settings, llm_config).""" + trace = ProviderTrace( + request_json={}, + response_json={}, + step_id="step-123", + org_id="org-123", + user_id="user-123", + compaction_settings={"mode": "sliding_window", "target_message_count": 50}, + llm_config={"model": "gpt-4", "temperature": 0.7}, + ) + assert trace.org_id == "org-123" + assert trace.user_id == "user-123" + assert trace.compaction_settings == {"mode": "sliding_window", "target_message_count": 50} + assert trace.llm_config == {"model": "gpt-4", "temperature": 0.7} + + def test_v2_fields_mutually_exclusive_by_convention(self): + """Test that compaction_settings is set for summarization, llm_config for non-summarization.""" + summarization_trace = ProviderTrace( + request_json={}, + response_json={}, + step_id="step-123", + call_type="summarization", + compaction_settings={"mode": "partial_evict"}, + llm_config=None, + ) + assert summarization_trace.call_type == "summarization" + assert summarization_trace.compaction_settings is not None + assert summarization_trace.llm_config is None + + agent_step_trace = ProviderTrace( + request_json={}, + response_json={}, + step_id="step-456", + call_type="agent_step", + compaction_settings=None, + llm_config={"model": "claude-3"}, + ) + assert agent_step_trace.call_type == "agent_step" + assert agent_step_trace.compaction_settings is None + assert agent_step_trace.llm_config is not None + class TestSocketProviderTraceBackend: """Tests for SocketProviderTraceBackend.""" @@ -171,12 +213,11 @@ class TestSocketProviderTraceBackend: assert len(received_data) == 1 record = json.loads(received_data[0].strip()) assert record["provider_trace_id"] == sample_provider_trace.id - assert record["model"] == "gpt-4o-mini" - assert record["provider"] == "openai" - assert record["input_tokens"] == 10 - assert record["output_tokens"] == 5 - assert record["context"]["step_id"] == "step-test-789" - assert record["context"]["run_id"] == "run-test-abc" + assert record["step_id"] == "step-test-789" + assert record["run_id"] == "run-test-abc" + assert record["request"]["model"] == "gpt-4o-mini" + assert record["response"]["usage"]["prompt_tokens"] == 10 + assert record["response"]["usage"]["completion_tokens"] == 5 def test_send_to_nonexistent_socket_does_not_raise(self, sample_provider_trace): """Test that sending to nonexistent socket fails silently.""" @@ -247,6 +288,36 @@ class TestSocketProviderTraceBackend: assert captured_records[0]["error"] == "Rate limit exceeded" assert captured_records[0]["response"] is None + def test_record_includes_v2_protocol_fields(self): + """Test that v2 protocol fields are included in the socket record.""" + trace = ProviderTrace( + request_json={"model": "gpt-4"}, + response_json={"id": "test"}, + step_id="step-123", + org_id="org-456", + user_id="user-456", + compaction_settings={"mode": "sliding_window"}, + llm_config={"model": "gpt-4", "temperature": 0.5}, + ) + + backend = SocketProviderTraceBackend(socket_path="/fake/path") + + captured_records = [] + + def capture_record(record): + captured_records.append(record) + + with patch.object(backend, "_send_async", side_effect=capture_record): + backend._send_to_crouton(trace) + + assert len(captured_records) == 1 + record = captured_records[0] + assert record["protocol_version"] == 2 + assert record["org_id"] == "org-456" + assert record["user_id"] == "user-456" + assert record["compaction_settings"] == {"mode": "sliding_window"} + assert record["llm_config"] == {"model": "gpt-4", "temperature": 0.5} + class TestBackendFactory: """Tests for backend factory.""" diff --git a/tests/test_provider_trace_summarization.py b/tests/test_provider_trace_summarization.py new file mode 100644 index 00000000..3f114736 --- /dev/null +++ b/tests/test_provider_trace_summarization.py @@ -0,0 +1,431 @@ +""" +Unit tests for summarization provider trace telemetry context. + +These tests verify that summarization LLM calls correctly pass telemetry context +(agent_id, agent_tags, run_id, step_id) to the provider trace system. +""" + +import uuid +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from letta.schemas.agent import AgentState +from letta.schemas.block import Block +from letta.schemas.embedding_config import EmbeddingConfig +from letta.schemas.enums import MessageRole +from letta.schemas.llm_config import LLMConfig +from letta.schemas.message import Message +from letta.schemas.user import User +from letta.services.summarizer import summarizer_all, summarizer_sliding_window +from letta.services.summarizer.enums import SummarizationMode +from letta.services.summarizer.summarizer import Summarizer, simple_summary +from letta.services.summarizer.summarizer_config import CompactionSettings + + +@pytest.fixture +def mock_actor(): + """Create a mock user/actor.""" + return User( + id=f"user-{uuid.uuid4()}", + organization_id=f"org-{uuid.uuid4()}", + name="test_user", + ) + + +@pytest.fixture +def mock_llm_config(): + """Create a mock LLM config.""" + return LLMConfig( + model="gpt-4o-mini", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=8000, + ) + + +@pytest.fixture +def mock_agent_state(mock_llm_config): + """Create a mock agent state.""" + agent_id = f"agent-{uuid.uuid4()}" + return AgentState( + id=agent_id, + name="test_agent", + llm_config=mock_llm_config, + embedding_config=EmbeddingConfig.default_config(provider="openai"), + tags=["env:test", "team:ml"], + memory=MagicMock( + compile=MagicMock(return_value="Memory content"), + ), + message_ids=[], + tool_ids=[], + system="You are a helpful assistant.", + ) + + +@pytest.fixture +def mock_messages(): + """Create mock messages for summarization.""" + agent_id = f"agent-{uuid.uuid4()}" + messages = [] + for i in range(10): + msg = Message( + id=f"message-{uuid.uuid4()}", + role=MessageRole.user if i % 2 == 0 else MessageRole.assistant, + content=[{"type": "text", "text": f"Message content {i}"}], + agent_id=agent_id, + ) + messages.append(msg) + return messages + + +class TestSimpleSummaryTelemetryContext: + """Tests for simple_summary telemetry context passing.""" + + @pytest.mark.asyncio + async def test_simple_summary_accepts_telemetry_params(self, mock_messages, mock_llm_config, mock_actor): + """Verify simple_summary accepts all telemetry context parameters.""" + agent_id = f"agent-{uuid.uuid4()}" + agent_tags = ["tag1", "tag2"] + run_id = f"run-{uuid.uuid4()}" + step_id = f"step-{uuid.uuid4()}" + + with patch("letta.services.summarizer.summarizer.LLMClient") as mock_client_class: + mock_client = MagicMock() + mock_client.set_telemetry_context = MagicMock() + mock_client.send_llm_request_async = AsyncMock(return_value=MagicMock(content="Summary of conversation")) + mock_client_class.create.return_value = mock_client + + try: + await simple_summary( + messages=mock_messages, + llm_config=mock_llm_config, + actor=mock_actor, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + step_id=step_id, + ) + except Exception: + pass + + mock_client.set_telemetry_context.assert_called_once() + call_kwargs = mock_client.set_telemetry_context.call_args[1] + assert call_kwargs["agent_id"] == agent_id + assert call_kwargs["agent_tags"] == agent_tags + assert call_kwargs["run_id"] == run_id + assert call_kwargs["step_id"] == step_id + assert call_kwargs["call_type"] == "summarization" + + +class TestSummarizeAllTelemetryContext: + """Tests for summarize_all telemetry context passing.""" + + @pytest.fixture + def mock_compaction_settings(self): + """Create mock compaction settings.""" + return CompactionSettings(model="openai/gpt-4o-mini") + + @pytest.mark.asyncio + async def test_summarize_all_passes_telemetry_to_simple_summary( + self, mock_messages, mock_llm_config, mock_actor, mock_compaction_settings + ): + """Verify summarize_all passes telemetry context to simple_summary.""" + agent_id = f"agent-{uuid.uuid4()}" + agent_tags = ["env:prod", "team:core"] + run_id = f"run-{uuid.uuid4()}" + step_id = f"step-{uuid.uuid4()}" + + captured_kwargs = {} + + async def capture_simple_summary(*args, **kwargs): + captured_kwargs.update(kwargs) + return "Mocked summary" + + with patch.object(summarizer_all, "simple_summary", new=capture_simple_summary): + await summarizer_all.summarize_all( + actor=mock_actor, + llm_config=mock_llm_config, + summarizer_config=mock_compaction_settings, + in_context_messages=mock_messages, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + step_id=step_id, + ) + + assert captured_kwargs.get("agent_id") == agent_id + assert captured_kwargs.get("agent_tags") == agent_tags + assert captured_kwargs.get("run_id") == run_id + assert captured_kwargs.get("step_id") == step_id + + @pytest.mark.asyncio + async def test_summarize_all_without_telemetry_params(self, mock_messages, mock_llm_config, mock_actor, mock_compaction_settings): + """Verify summarize_all works without telemetry params (backwards compatible).""" + captured_kwargs = {} + + async def capture_simple_summary(*args, **kwargs): + captured_kwargs.update(kwargs) + return "Mocked summary" + + with patch.object(summarizer_all, "simple_summary", new=capture_simple_summary): + await summarizer_all.summarize_all( + actor=mock_actor, + llm_config=mock_llm_config, + summarizer_config=mock_compaction_settings, + in_context_messages=mock_messages, + ) + + assert captured_kwargs.get("agent_id") is None + assert captured_kwargs.get("agent_tags") is None + assert captured_kwargs.get("run_id") is None + assert captured_kwargs.get("step_id") is None + + +class TestSummarizeSlidingWindowTelemetryContext: + """Tests for summarize_via_sliding_window telemetry context passing.""" + + @pytest.fixture + def mock_compaction_settings(self): + """Create mock compaction settings.""" + return CompactionSettings(model="openai/gpt-4o-mini") + + @pytest.mark.asyncio + async def test_sliding_window_passes_telemetry_to_simple_summary( + self, mock_messages, mock_llm_config, mock_actor, mock_compaction_settings + ): + """Verify summarize_via_sliding_window passes telemetry context to simple_summary.""" + agent_id = f"agent-{uuid.uuid4()}" + agent_tags = ["version:v2"] + run_id = f"run-{uuid.uuid4()}" + step_id = f"step-{uuid.uuid4()}" + + captured_kwargs = {} + + async def capture_simple_summary(*args, **kwargs): + captured_kwargs.update(kwargs) + return "Mocked summary" + + with patch.object(summarizer_sliding_window, "simple_summary", new=capture_simple_summary): + await summarizer_sliding_window.summarize_via_sliding_window( + actor=mock_actor, + llm_config=mock_llm_config, + summarizer_config=mock_compaction_settings, + in_context_messages=mock_messages, + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + step_id=step_id, + ) + + assert captured_kwargs.get("agent_id") == agent_id + assert captured_kwargs.get("agent_tags") == agent_tags + assert captured_kwargs.get("run_id") == run_id + assert captured_kwargs.get("step_id") == step_id + + +class TestSummarizerClassTelemetryContext: + """Tests for Summarizer class telemetry context passing.""" + + @pytest.mark.asyncio + async def test_summarizer_summarize_passes_runtime_telemetry(self, mock_messages, mock_actor): + """Verify Summarizer.summarize() passes runtime run_id/step_id to the underlying call.""" + run_id = f"run-{uuid.uuid4()}" + step_id = f"step-{uuid.uuid4()}" + agent_id = f"agent-{uuid.uuid4()}" + + mock_agent_manager = MagicMock() + mock_agent_manager.get_agent_by_id_async = AsyncMock( + return_value=MagicMock( + llm_config=LLMConfig( + model="gpt-4o-mini", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=8000, + ), + tags=["test-tag"], + ) + ) + + summarizer = Summarizer( + mode=SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER, + summarizer_agent=None, + message_buffer_limit=100, + message_buffer_min=10, + partial_evict_summarizer_percentage=0.5, + agent_manager=mock_agent_manager, + message_manager=MagicMock(), + actor=mock_actor, + agent_id=agent_id, + ) + + captured_kwargs = {} + + async def capture_simple_summary(*args, **kwargs): + captured_kwargs.update(kwargs) + return "Mocked summary" + + with patch("letta.services.summarizer.summarizer.simple_summary", new=capture_simple_summary): + try: + await summarizer.summarize( + in_context_messages=mock_messages, + new_letta_messages=[], + force=True, + run_id=run_id, + step_id=step_id, + ) + except Exception: + pass + + if captured_kwargs: + assert captured_kwargs.get("run_id") == run_id + assert captured_kwargs.get("step_id") == step_id + + @pytest.mark.asyncio + async def test_summarizer_uses_constructor_telemetry_as_default(self, mock_messages, mock_actor): + """Verify Summarizer uses constructor run_id/step_id when not passed to summarize().""" + constructor_run_id = f"run-{uuid.uuid4()}" + constructor_step_id = f"step-{uuid.uuid4()}" + agent_id = f"agent-{uuid.uuid4()}" + + mock_agent_manager = MagicMock() + mock_agent_manager.get_agent_by_id_async = AsyncMock( + return_value=MagicMock( + llm_config=LLMConfig( + model="gpt-4o-mini", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=8000, + ), + tags=["test-tag"], + ) + ) + + summarizer = Summarizer( + mode=SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER, + summarizer_agent=None, + message_buffer_limit=100, + message_buffer_min=10, + partial_evict_summarizer_percentage=0.5, + agent_manager=mock_agent_manager, + message_manager=MagicMock(), + actor=mock_actor, + agent_id=agent_id, + run_id=constructor_run_id, + step_id=constructor_step_id, + ) + + captured_kwargs = {} + + async def capture_simple_summary(*args, **kwargs): + captured_kwargs.update(kwargs) + return "Mocked summary" + + with patch("letta.services.summarizer.summarizer.simple_summary", new=capture_simple_summary): + try: + await summarizer.summarize( + in_context_messages=mock_messages, + new_letta_messages=[], + force=True, + ) + except Exception: + pass + + if captured_kwargs: + assert captured_kwargs.get("run_id") == constructor_run_id + assert captured_kwargs.get("step_id") == constructor_step_id + + @pytest.mark.asyncio + async def test_summarizer_runtime_overrides_constructor_telemetry(self, mock_messages, mock_actor): + """Verify runtime run_id/step_id override constructor values.""" + constructor_run_id = f"run-constructor-{uuid.uuid4()}" + constructor_step_id = f"step-constructor-{uuid.uuid4()}" + runtime_run_id = f"run-runtime-{uuid.uuid4()}" + runtime_step_id = f"step-runtime-{uuid.uuid4()}" + agent_id = f"agent-{uuid.uuid4()}" + + mock_agent_manager = MagicMock() + mock_agent_manager.get_agent_by_id_async = AsyncMock( + return_value=MagicMock( + llm_config=LLMConfig( + model="gpt-4o-mini", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=8000, + ), + tags=["test-tag"], + ) + ) + + summarizer = Summarizer( + mode=SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER, + summarizer_agent=None, + message_buffer_limit=100, + message_buffer_min=10, + partial_evict_summarizer_percentage=0.5, + agent_manager=mock_agent_manager, + message_manager=MagicMock(), + actor=mock_actor, + agent_id=agent_id, + run_id=constructor_run_id, + step_id=constructor_step_id, + ) + + captured_kwargs = {} + + async def capture_simple_summary(*args, **kwargs): + captured_kwargs.update(kwargs) + return "Mocked summary" + + with patch("letta.services.summarizer.summarizer.simple_summary", new=capture_simple_summary): + try: + await summarizer.summarize( + in_context_messages=mock_messages, + new_letta_messages=[], + force=True, + run_id=runtime_run_id, + step_id=runtime_step_id, + ) + except Exception: + pass + + if captured_kwargs: + assert captured_kwargs.get("run_id") == runtime_run_id + assert captured_kwargs.get("step_id") == runtime_step_id + + +class TestLLMClientTelemetryContext: + """Tests for LLM client telemetry context setting.""" + + def test_llm_client_set_telemetry_context_method_exists(self): + """Verify LLMClient has set_telemetry_context method.""" + from letta.llm_api.llm_client import LLMClient + + client = LLMClient.create( + provider_type="openai", + put_inner_thoughts_first=True, + ) + assert hasattr(client, "set_telemetry_context") + + def test_llm_client_set_telemetry_context_accepts_all_params(self): + """Verify set_telemetry_context accepts all telemetry parameters.""" + from letta.llm_api.llm_client import LLMClient + + client = LLMClient.create( + provider_type="openai", + put_inner_thoughts_first=True, + ) + + agent_id = f"agent-{uuid.uuid4()}" + agent_tags = ["tag1", "tag2"] + run_id = f"run-{uuid.uuid4()}" + step_id = f"step-{uuid.uuid4()}" + call_type = "summarization" + + client.set_telemetry_context( + agent_id=agent_id, + agent_tags=agent_tags, + run_id=run_id, + step_id=step_id, + call_type=call_type, + ) diff --git a/tests/test_providers.py b/tests/test_providers.py index aebd2b93..508e23e6 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -11,8 +11,10 @@ from letta.schemas.providers import ( GoogleAIProvider, GoogleVertexProvider, GroqProvider, + MiniMaxProvider, OllamaProvider, OpenAIProvider, + SGLangProvider, TogetherProvider, VLLMProvider, ZAIProvider, @@ -130,6 +132,32 @@ async def test_groq(): assert models[0].handle == f"{provider.name}/{models[0].model}" +@pytest.mark.asyncio +async def test_minimax(): + """Test MiniMax provider - uses hardcoded model list, no API key required.""" + provider = MiniMaxProvider(name="minimax") + models = await provider.list_llm_models_async() + + # Should have exactly 3 models: M2.1, M2.1-lightning, M2 + assert len(models) == 3 + + # Verify model properties + model_names = {m.model for m in models} + assert "MiniMax-M2.1" in model_names + assert "MiniMax-M2.1-lightning" in model_names + assert "MiniMax-M2" in model_names + + # Verify handle format + for model in models: + assert model.handle == f"{provider.name}/{model.model}" + # All MiniMax models have 200K context window + assert model.context_window == 200000 + # All MiniMax models have 128K max output + assert model.max_tokens == 128000 + # MiniMax uses Anthropic-compatible API endpoint + assert model.model_endpoint_type == "minimax" + + @pytest.mark.skipif(model_settings.azure_api_key is None, reason="Only run if AZURE_API_KEY is set.") @pytest.mark.asyncio async def test_azure(): @@ -199,6 +227,18 @@ async def test_vllm(): assert len(embedding_models) == 0 # embedding models currently not supported by vLLM +@pytest.mark.skipif(model_settings.sglang_api_base is None, reason="Only run if SGLANG_API_BASE is set.") +@pytest.mark.asyncio +async def test_sglang(): + provider = SGLangProvider(name="sglang", base_url=model_settings.sglang_api_base) + models = await provider.list_llm_models_async() + assert len(models) > 0 + assert models[0].handle == f"{provider.name}/{models[0].model}" + + embedding_models = await provider.list_embedding_models_async() + assert len(embedding_models) == 0 # embedding models currently not supported by SGLang + + # TODO: Add back in, difficulty adding this to CI properly, need boto credentials # def test_anthropic_bedrock(): # from letta.settings import model_settings diff --git a/tests/test_server_providers.py b/tests/test_server_providers.py index c3ca87da..e628497d 100644 --- a/tests/test_server_providers.py +++ b/tests/test_server_providers.py @@ -2292,6 +2292,7 @@ async def test_server_list_llm_models_byok_from_provider_api(default_user, provi # Create a mock typed provider that returns our test models mock_typed_provider = MagicMock() mock_typed_provider.list_llm_models_async = AsyncMock(return_value=mock_byok_models) + mock_typed_provider.list_embedding_models_async = AsyncMock(return_value=[]) # Patch cast_to_subtype on the Provider class to return our mock with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider): @@ -2525,3 +2526,717 @@ async def test_create_agent_with_byok_handle_dynamic_fetch(default_user, provide # Cleanup await server.agent_manager.delete_agent_async(agent_id=agent.id, actor=default_user) + + +@pytest.mark.asyncio +async def test_byok_provider_last_synced_triggers_sync_when_null(default_user, provider_manager): + """Test that BYOK providers with last_synced=null trigger a sync on first model listing.""" + from letta.schemas.providers import Provider + from letta.server.server import SyncServer + + test_id = generate_test_id() + + # Create a BYOK provider (last_synced will be null by default) + byok_provider_create = ProviderCreate( + name=f"test-byok-sync-{test_id}", + provider_type=ProviderType.openai, + api_key="sk-byok-key", + ) + byok_provider = await provider_manager.create_provider_async(byok_provider_create, actor=default_user, is_byok=True) + + # Verify last_synced is null initially + assert byok_provider.last_synced is None + + # Create server + server = SyncServer(init_with_default_org_and_user=False) + server.default_user = default_user + server.provider_manager = provider_manager + server._enabled_providers = [] + + # Mock the BYOK provider's list_llm_models_async to return test models + mock_byok_models = [ + LLMConfig( + model=f"byok-gpt-4o-{test_id}", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=64000, + handle=f"test-byok-sync-{test_id}/gpt-4o", + provider_name=byok_provider.name, + provider_category=ProviderCategory.byok, + ) + ] + + mock_typed_provider = MagicMock() + mock_typed_provider.list_llm_models_async = AsyncMock(return_value=mock_byok_models) + mock_typed_provider.list_embedding_models_async = AsyncMock(return_value=[]) + + with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider): + # List BYOK models - should trigger sync because last_synced is null + byok_models = await server.list_llm_models_async( + actor=default_user, + provider_category=[ProviderCategory.byok], + ) + + # Verify sync was triggered (cast_to_subtype was called to fetch from API) + # Note: may be called multiple times if other BYOK providers exist in DB + mock_typed_provider.list_llm_models_async.assert_called() + + # Verify last_synced was updated for our provider + updated_providers = await provider_manager.list_providers_async(name=byok_provider.name, actor=default_user) + assert len(updated_providers) == 1 + assert updated_providers[0].last_synced is not None + + # Verify models were synced to database + synced_models = await provider_manager.list_models_async( + actor=default_user, + model_type="llm", + provider_id=byok_provider.id, + ) + assert len(synced_models) == 1 + assert synced_models[0].name == f"byok-gpt-4o-{test_id}" + + +@pytest.mark.asyncio +async def test_byok_provider_last_synced_skips_sync_when_set(default_user, provider_manager): + """Test that BYOK providers with last_synced set skip sync and read from DB.""" + from datetime import datetime, timezone + + from letta.schemas.providers import Provider + from letta.server.server import SyncServer + + test_id = generate_test_id() + + # Create a BYOK provider + byok_provider_create = ProviderCreate( + name=f"test-byok-cached-{test_id}", + provider_type=ProviderType.openai, + api_key="sk-byok-key", + ) + byok_provider = await provider_manager.create_provider_async(byok_provider_create, actor=default_user, is_byok=True) + + # Manually sync models to DB + cached_model = LLMConfig( + model=f"cached-gpt-4o-{test_id}", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=64000, + handle=f"test-byok-cached-{test_id}/gpt-4o", + provider_name=byok_provider.name, + provider_category=ProviderCategory.byok, + ) + await provider_manager.sync_provider_models_async( + provider=byok_provider, + llm_models=[cached_model], + embedding_models=[], + organization_id=default_user.organization_id, + ) + + # Set last_synced to indicate models are already synced + await provider_manager.update_provider_last_synced_async(byok_provider.id, actor=default_user) + + # Create server + server = SyncServer(init_with_default_org_and_user=False) + server.default_user = default_user + server.provider_manager = provider_manager + server._enabled_providers = [] + + # Mock cast_to_subtype - should NOT be called since last_synced is set + mock_typed_provider = MagicMock() + mock_typed_provider.list_llm_models_async = AsyncMock(return_value=[]) + mock_typed_provider.list_embedding_models_async = AsyncMock(return_value=[]) + + with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider): + # List BYOK models - should read from DB, not trigger sync + byok_models = await server.list_llm_models_async( + actor=default_user, + provider_category=[ProviderCategory.byok], + ) + + # Verify sync was NOT triggered (cast_to_subtype should not be called) + mock_typed_provider.list_llm_models_async.assert_not_called() + + # Verify we got the cached model from DB + byok_handles = [m.handle for m in byok_models] + assert f"test-byok-cached-{test_id}/gpt-4o" in byok_handles + + +@pytest.mark.asyncio +async def test_base_provider_updates_last_synced_on_sync(default_user, provider_manager): + """Test that base provider sync updates the last_synced timestamp.""" + from letta.server.server import SyncServer + + test_id = generate_test_id() + + # Create a base provider + base_provider_create = ProviderCreate( + name=f"test-base-sync-{test_id}", + provider_type=ProviderType.openai, + api_key="", # Base providers don't store API keys + ) + base_provider = await provider_manager.create_provider_async(base_provider_create, actor=default_user, is_byok=False) + + # Verify last_synced is null initially + assert base_provider.last_synced is None + + # Sync models for the base provider + base_model = LLMConfig( + model=f"base-gpt-4o-{test_id}", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=64000, + handle=f"test-base-sync-{test_id}/gpt-4o", + ) + await provider_manager.sync_provider_models_async( + provider=base_provider, + llm_models=[base_model], + embedding_models=[], + organization_id=None, + ) + await provider_manager.update_provider_last_synced_async(base_provider.id, actor=default_user) + + # Verify last_synced was updated + updated_providers = await provider_manager.list_providers_async(name=base_provider.name, actor=default_user) + assert len(updated_providers) == 1 + assert updated_providers[0].last_synced is not None + + +@pytest.mark.asyncio +async def test_byok_provider_models_synced_on_creation(default_user, provider_manager): + """Test that models are automatically synced when a BYOK provider is created. + + When create_provider_async is called with is_byok=True, it should: + 1. Create the provider in the database + 2. Call _sync_default_models_for_provider to fetch and persist models from the provider API + 3. Update last_synced timestamp + """ + from letta.schemas.providers import Provider + + test_id = generate_test_id() + + # Mock models that the provider API would return + mock_llm_models = [ + LLMConfig( + model="gpt-4o", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=f"test-byok-creation-{test_id}/gpt-4o", + provider_name=f"test-byok-creation-{test_id}", + provider_category=ProviderCategory.byok, + ), + LLMConfig( + model="gpt-4o-mini", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=f"test-byok-creation-{test_id}/gpt-4o-mini", + provider_name=f"test-byok-creation-{test_id}", + provider_category=ProviderCategory.byok, + ), + ] + mock_embedding_models = [ + EmbeddingConfig( + embedding_model="text-embedding-3-small", + embedding_endpoint_type="openai", + embedding_endpoint="https://api.openai.com/v1", + embedding_dim=1536, + embedding_chunk_size=300, + handle=f"test-byok-creation-{test_id}/text-embedding-3-small", + ), + ] + + # Create a mock typed provider that returns our test models + mock_typed_provider = MagicMock() + mock_typed_provider.list_llm_models_async = AsyncMock(return_value=mock_llm_models) + mock_typed_provider.list_embedding_models_async = AsyncMock(return_value=mock_embedding_models) + + # Patch cast_to_subtype to return our mock when _sync_default_models_for_provider is called + with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider): + # Create the BYOK provider - this should automatically sync models + byok_provider_create = ProviderCreate( + name=f"test-byok-creation-{test_id}", + provider_type=ProviderType.openai, + api_key="sk-test-key", + ) + byok_provider = await provider_manager.create_provider_async(byok_provider_create, actor=default_user, is_byok=True) + + # Verify the provider API was called during creation + mock_typed_provider.list_llm_models_async.assert_called_once() + mock_typed_provider.list_embedding_models_async.assert_called_once() + + # Re-fetch the provider to get the updated last_synced value + # (the returned object from create_provider_async is stale since last_synced is set after) + byok_provider = await provider_manager.get_provider_async(byok_provider.id, default_user) + + # Verify last_synced was set (indicating sync completed) + assert byok_provider.last_synced is not None + + # Verify LLM models were persisted to the database + synced_llm_models = await provider_manager.list_models_async( + actor=default_user, + model_type="llm", + provider_id=byok_provider.id, + ) + assert len(synced_llm_models) == 2 + synced_llm_names = {m.name for m in synced_llm_models} + assert "gpt-4o" in synced_llm_names + assert "gpt-4o-mini" in synced_llm_names + + # Verify embedding models were persisted to the database + synced_embedding_models = await provider_manager.list_models_async( + actor=default_user, + model_type="embedding", + provider_id=byok_provider.id, + ) + assert len(synced_embedding_models) == 1 + assert synced_embedding_models[0].name == "text-embedding-3-small" + + +@pytest.mark.asyncio +async def test_refresh_byok_provider_adds_new_models(default_user, provider_manager): + """Test that refreshing a BYOK provider adds new models from the provider API. + + When _sync_default_models_for_provider is called (via refresh endpoint): + 1. It should fetch current models from the provider API + 2. Add any new models that weren't previously synced + 3. Update the last_synced timestamp + """ + from letta.schemas.providers import Provider + + test_id = generate_test_id() + + # Initial models when provider is created + initial_models = [ + LLMConfig( + model="gpt-4o", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=f"test-refresh-add-{test_id}/gpt-4o", + provider_name=f"test-refresh-add-{test_id}", + provider_category=ProviderCategory.byok, + ), + ] + + # Updated models after refresh (includes a new model) + updated_models = [ + LLMConfig( + model="gpt-4o", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=f"test-refresh-add-{test_id}/gpt-4o", + provider_name=f"test-refresh-add-{test_id}", + provider_category=ProviderCategory.byok, + ), + LLMConfig( + model="gpt-4.1", # New model added by provider + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=256000, + handle=f"test-refresh-add-{test_id}/gpt-4.1", + provider_name=f"test-refresh-add-{test_id}", + provider_category=ProviderCategory.byok, + ), + ] + + # Create mock for initial sync during provider creation + mock_typed_provider_initial = MagicMock() + mock_typed_provider_initial.list_llm_models_async = AsyncMock(return_value=initial_models) + mock_typed_provider_initial.list_embedding_models_async = AsyncMock(return_value=[]) + + # Create the provider with initial models + with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider_initial): + byok_provider_create = ProviderCreate( + name=f"test-refresh-add-{test_id}", + provider_type=ProviderType.openai, + api_key="sk-test-key", + ) + byok_provider = await provider_manager.create_provider_async(byok_provider_create, actor=default_user, is_byok=True) + + # Re-fetch the provider to get the updated last_synced value + byok_provider = await provider_manager.get_provider_async(byok_provider.id, default_user) + + # Verify initial sync - should have 1 model + initial_synced_models = await provider_manager.list_models_async( + actor=default_user, + model_type="llm", + provider_id=byok_provider.id, + ) + assert len(initial_synced_models) == 1 + assert initial_synced_models[0].name == "gpt-4o" + + initial_last_synced = byok_provider.last_synced + assert initial_last_synced is not None # Verify sync happened during creation + + # Create mock for refresh with updated models + mock_typed_provider_refresh = MagicMock() + mock_typed_provider_refresh.list_llm_models_async = AsyncMock(return_value=updated_models) + mock_typed_provider_refresh.list_embedding_models_async = AsyncMock(return_value=[]) + + # Refresh the provider (simulating what the endpoint does) + with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider_refresh): + await provider_manager._sync_default_models_for_provider(byok_provider, default_user) + + # Verify the API was called during refresh + mock_typed_provider_refresh.list_llm_models_async.assert_called_once() + + # Verify new model was added + refreshed_models = await provider_manager.list_models_async( + actor=default_user, + model_type="llm", + provider_id=byok_provider.id, + ) + assert len(refreshed_models) == 2 + refreshed_names = {m.name for m in refreshed_models} + assert "gpt-4o" in refreshed_names + assert "gpt-4.1" in refreshed_names + + # Verify last_synced was updated + updated_provider = await provider_manager.get_provider_async(byok_provider.id, default_user) + assert updated_provider.last_synced is not None + assert updated_provider.last_synced >= initial_last_synced + + +@pytest.mark.asyncio +async def test_refresh_byok_provider_removes_old_models(default_user, provider_manager): + """Test that refreshing a BYOK provider removes models no longer available from the provider API. + + When _sync_default_models_for_provider is called (via refresh endpoint): + 1. It should fetch current models from the provider API + 2. Remove any models that are no longer available (soft delete) + 3. Keep models that are still available + """ + from letta.schemas.providers import Provider + + test_id = generate_test_id() + + # Initial models when provider is created (includes a model that will be removed) + initial_models = [ + LLMConfig( + model="gpt-4o", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=f"test-refresh-remove-{test_id}/gpt-4o", + provider_name=f"test-refresh-remove-{test_id}", + provider_category=ProviderCategory.byok, + ), + LLMConfig( + model="gpt-4-turbo", # This model will be deprecated/removed + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=f"test-refresh-remove-{test_id}/gpt-4-turbo", + provider_name=f"test-refresh-remove-{test_id}", + provider_category=ProviderCategory.byok, + ), + ] + + # Updated models after refresh (gpt-4-turbo is no longer available) + updated_models = [ + LLMConfig( + model="gpt-4o", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=f"test-refresh-remove-{test_id}/gpt-4o", + provider_name=f"test-refresh-remove-{test_id}", + provider_category=ProviderCategory.byok, + ), + ] + + # Create mock for initial sync during provider creation + mock_typed_provider_initial = MagicMock() + mock_typed_provider_initial.list_llm_models_async = AsyncMock(return_value=initial_models) + mock_typed_provider_initial.list_embedding_models_async = AsyncMock(return_value=[]) + + # Create the provider with initial models + with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider_initial): + byok_provider_create = ProviderCreate( + name=f"test-refresh-remove-{test_id}", + provider_type=ProviderType.openai, + api_key="sk-test-key", + ) + byok_provider = await provider_manager.create_provider_async(byok_provider_create, actor=default_user, is_byok=True) + + # Verify initial sync - should have 2 models + initial_synced_models = await provider_manager.list_models_async( + actor=default_user, + model_type="llm", + provider_id=byok_provider.id, + ) + assert len(initial_synced_models) == 2 + initial_names = {m.name for m in initial_synced_models} + assert "gpt-4o" in initial_names + assert "gpt-4-turbo" in initial_names + + # Create mock for refresh with fewer models + mock_typed_provider_refresh = MagicMock() + mock_typed_provider_refresh.list_llm_models_async = AsyncMock(return_value=updated_models) + mock_typed_provider_refresh.list_embedding_models_async = AsyncMock(return_value=[]) + + # Refresh the provider (simulating what the endpoint does) + with patch.object(Provider, "cast_to_subtype", return_value=mock_typed_provider_refresh): + await provider_manager._sync_default_models_for_provider(byok_provider, default_user) + + # Verify the removed model is no longer in the list + refreshed_models = await provider_manager.list_models_async( + actor=default_user, + model_type="llm", + provider_id=byok_provider.id, + ) + assert len(refreshed_models) == 1 + assert refreshed_models[0].name == "gpt-4o" + + # Verify gpt-4-turbo was removed (soft deleted) + refreshed_names = {m.name for m in refreshed_models} + assert "gpt-4-turbo" not in refreshed_names + + +@pytest.mark.asyncio +async def test_refresh_base_provider_fails(default_user, provider_manager): + """Test that attempting to refresh a base provider returns an error. + + The refresh endpoint should only work for BYOK providers, not base providers. + Base providers are managed by environment variables and shouldn't be refreshed. + """ + from fastapi import HTTPException + + from letta.server.rest_api.routers.v1.providers import refresh_provider_models + from letta.server.server import SyncServer + + test_id = generate_test_id() + + # Create a base provider + base_provider_create = ProviderCreate( + name=f"test-base-refresh-{test_id}", + provider_type=ProviderType.openai, + api_key="", # Base providers don't store API keys + ) + base_provider = await provider_manager.create_provider_async(base_provider_create, actor=default_user, is_byok=False) + + # Verify it's a base provider + assert base_provider.provider_category == ProviderCategory.base + + # Create a mock server + server = SyncServer(init_with_default_org_and_user=False) + server.provider_manager = provider_manager + + # Create mock headers + mock_headers = MagicMock() + mock_headers.actor_id = default_user.id + + # Mock get_actor_or_default_async to return our test user + server.user_manager = MagicMock() + server.user_manager.get_actor_or_default_async = AsyncMock(return_value=default_user) + + # Attempt to refresh the base provider - should raise HTTPException + with pytest.raises(HTTPException) as exc_info: + await refresh_provider_models( + provider_id=base_provider.id, + headers=mock_headers, + server=server, + ) + + assert exc_info.value.status_code == 400 + assert "BYOK" in exc_info.value.detail + + +@pytest.mark.asyncio +async def test_get_model_by_handle_prioritizes_byok_over_base(default_user, provider_manager): + """Test that get_model_by_handle_async returns the BYOK model when both BYOK and base providers have the same handle. + + This tests the legacy scenario where a user has both a BYOK provider and a base provider + with the same name (and thus models with the same handle). The BYOK model should be + returned because it's organization-specific, while base models are global. + """ + test_id = generate_test_id() + provider_name = f"test-duplicate-{test_id}" + model_handle = f"{provider_name}/gpt-4o" + + # Step 1: Create a base provider and sync a model for it (global, organization_id=None) + base_provider_create = ProviderCreate( + name=provider_name, + provider_type=ProviderType.openai, + api_key="", # Base providers don't store API keys + ) + base_provider = await provider_manager.create_provider_async(base_provider_create, actor=default_user, is_byok=False) + assert base_provider.provider_category == ProviderCategory.base + + # Sync a model for the base provider (global model with organization_id=None) + base_llm_model = LLMConfig( + model="gpt-4o", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=model_handle, + provider_name=provider_name, + ) + await provider_manager.sync_provider_models_async( + provider=base_provider, + llm_models=[base_llm_model], + embedding_models=[], + organization_id=None, # Global model + ) + + # Verify base model was created + base_model = await provider_manager.get_model_by_handle_async( + handle=model_handle, + actor=default_user, + model_type="llm", + ) + assert base_model is not None + assert base_model.handle == model_handle + assert base_model.organization_id is None # Global model + + # Step 2: Create a BYOK provider with the same name (simulating legacy duplicate) + # Note: In production, this is now prevented, but legacy data could have this + # We need to bypass the name conflict check for this test (simulating legacy data) + # Create the BYOK provider directly by manipulating the database + from letta.orm.provider import Provider as ProviderORM + from letta.schemas.providers import Provider as PydanticProvider + from letta.server.db import db_registry + + # Create a pydantic provider first to generate an ID + byok_pydantic_provider = PydanticProvider( + name=provider_name, # Same name as base provider + provider_type=ProviderType.openai, + provider_category=ProviderCategory.byok, + organization_id=default_user.organization_id, + ) + byok_pydantic_provider.resolve_identifier() + + async with db_registry.async_session() as session: + byok_provider_orm = ProviderORM(**byok_pydantic_provider.model_dump(to_orm=True)) + await byok_provider_orm.create_async(session, actor=default_user) + byok_provider = byok_provider_orm.to_pydantic() + + assert byok_provider.provider_category == ProviderCategory.byok + + # Sync a model for the BYOK provider (org-specific model) + byok_llm_model = LLMConfig( + model="gpt-4o", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=128000, + handle=model_handle, # Same handle as base model + provider_name=provider_name, + provider_category=ProviderCategory.byok, + ) + await provider_manager.sync_provider_models_async( + provider=byok_provider, + llm_models=[byok_llm_model], + embedding_models=[], + organization_id=default_user.organization_id, # Org-specific model + ) + + # Step 3: Verify that get_model_by_handle_async returns the BYOK model (org-specific) + retrieved_model = await provider_manager.get_model_by_handle_async( + handle=model_handle, + actor=default_user, + model_type="llm", + ) + + assert retrieved_model is not None + assert retrieved_model.handle == model_handle + # The key assertion: org-specific (BYOK) model should be returned, not the global (base) model + assert retrieved_model.organization_id == default_user.organization_id + assert retrieved_model.provider_id == byok_provider.id + + +@pytest.mark.asyncio +async def test_byok_provider_uses_schema_default_base_url(default_user, provider_manager): + """Test that BYOK providers with schema-default base_url get correct model_endpoint. + + This tests a bug where providers like ZAI have a schema-default base_url + (e.g., "https://api.z.ai/api/paas/v4/") that isn't stored in the database. + When list_llm_models_async reads from DB, the base_url is NULL, and if the code + uses provider.base_url directly instead of typed_provider.base_url, the + model_endpoint would be None/wrong, causing requests to go to the wrong endpoint. + + The fix uses cast_to_subtype() to get the typed provider with schema defaults. + """ + from letta.orm.provider import Provider as ProviderORM + from letta.schemas.providers import Provider as PydanticProvider + from letta.schemas.providers.zai import ZAIProvider + from letta.server.db import db_registry + + test_id = generate_test_id() + provider_name = f"test-zai-{test_id}" + + # Create a ZAI BYOK provider WITHOUT explicitly setting base_url + # This simulates what happens when a user creates a ZAI provider via the API + # The schema default "https://api.z.ai/api/paas/v4/" applies in memory but + # may not be stored in the database (base_url column is NULL) + byok_pydantic_provider = PydanticProvider( + name=provider_name, + provider_type=ProviderType.zai, + provider_category=ProviderCategory.byok, + organization_id=default_user.organization_id, + # NOTE: base_url is intentionally NOT set - this is the bug scenario + # The DB will have base_url=NULL + ) + byok_pydantic_provider.resolve_identifier() + + async with db_registry.async_session() as session: + byok_provider_orm = ProviderORM(**byok_pydantic_provider.model_dump(to_orm=True)) + await byok_provider_orm.create_async(session, actor=default_user) + byok_provider = byok_provider_orm.to_pydantic() + + # Verify base_url is None in the provider loaded from DB + assert byok_provider.base_url is None, "base_url should be NULL in DB for this test" + assert byok_provider.provider_type == ProviderType.zai + + # Sync a model for the provider (simulating what happens after provider creation) + # Set last_synced so the server reads from DB instead of calling provider API + from datetime import datetime, timezone + + async with db_registry.async_session() as session: + provider_orm = await ProviderORM.read_async(session, identifier=byok_provider.id, actor=None) + provider_orm.last_synced = datetime.now(timezone.utc) + await session.commit() + + model_handle = f"{provider_name}/glm-4-flash" + byok_llm_model = LLMConfig( + model="glm-4-flash", + model_endpoint_type="zai", + model_endpoint="https://api.z.ai/api/paas/v4/", # The correct endpoint + context_window=128000, + handle=model_handle, + provider_name=provider_name, + provider_category=ProviderCategory.byok, + ) + await provider_manager.sync_provider_models_async( + provider=byok_provider, + llm_models=[byok_llm_model], + embedding_models=[], + organization_id=default_user.organization_id, + ) + + # Create server and list LLM models + server = SyncServer(init_with_default_org_and_user=False) + server.default_user = default_user + server.provider_manager = provider_manager + + # List LLM models - this should use typed_provider.base_url (schema default) + # NOT provider.base_url (which is NULL in DB) + models = await server.list_llm_models_async( + actor=default_user, + provider_category=[ProviderCategory.byok], # Only BYOK providers + ) + + # Find our ZAI model + zai_models = [m for m in models if m.handle == model_handle] + assert len(zai_models) == 1, f"Expected 1 ZAI model, got {len(zai_models)}" + + zai_model = zai_models[0] + + # THE KEY ASSERTION: model_endpoint should be the ZAI schema default, + # NOT None (which would cause requests to go to OpenAI's endpoint) + expected_endpoint = "https://api.z.ai/api/paas/v4/" + assert zai_model.model_endpoint == expected_endpoint, ( + f"model_endpoint should be '{expected_endpoint}' from ZAI schema default, " + f"but got '{zai_model.model_endpoint}'. This indicates the bug where " + f"provider.base_url (NULL from DB) was used instead of typed_provider.base_url." + ) diff --git a/tests/test_sources.py b/tests/test_sources.py index 2bf68edc..79555784 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -68,7 +68,8 @@ def client() -> LettaSDKClient: print("Starting server thread") thread = threading.Thread(target=run_server, daemon=True) thread.start() - wait_for_server(server_url) + # Use 60s timeout to allow for provider model syncing during server startup + wait_for_server(server_url, timeout=60) print("Running client tests with server:", server_url) client = LettaSDKClient(base_url=server_url) yield client diff --git a/tests/test_usage_parsing.py b/tests/test_usage_parsing.py new file mode 100644 index 00000000..0b9dc1c2 --- /dev/null +++ b/tests/test_usage_parsing.py @@ -0,0 +1,473 @@ +""" +Tests for usage statistics parsing through the production adapter path. + +These tests verify that SimpleLLMRequestAdapter correctly extracts usage statistics +from LLM responses, including: +1. Basic usage (prompt_tokens, completion_tokens, total_tokens) +2. Cache-related fields (cached_input_tokens, cache_write_tokens) +3. Reasoning tokens (for models that support it) + +This tests the actual production code path: + SimpleLLMRequestAdapter.invoke_llm() + → llm_client.request_async_with_telemetry() + → llm_client.convert_response_to_chat_completion() + → adapter extracts from chat_completions_response.usage + → normalize_cache_tokens() / normalize_reasoning_tokens() +""" + +import os + +import pytest + +from letta.adapters.simple_llm_request_adapter import SimpleLLMRequestAdapter +from letta.errors import LLMAuthenticationError +from letta.llm_api.anthropic_client import AnthropicClient +from letta.llm_api.google_ai_client import GoogleAIClient +from letta.llm_api.openai_client import OpenAIClient +from letta.schemas.enums import AgentType, MessageRole +from letta.schemas.letta_message_content import TextContent +from letta.schemas.llm_config import LLMConfig +from letta.schemas.message import Message +from letta.settings import model_settings + + +def _has_openai_credentials() -> bool: + return bool(model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")) + + +def _has_anthropic_credentials() -> bool: + return bool(model_settings.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY")) + + +def _has_gemini_credentials() -> bool: + return bool(model_settings.gemini_api_key or os.environ.get("GEMINI_API_KEY")) + + +def _build_simple_messages(user_content: str) -> list[Message]: + """Build a minimal message list for testing.""" + return [ + Message( + role=MessageRole.user, + content=[TextContent(text=user_content)], + ) + ] + + +# Large system prompt to exceed caching thresholds (>1024 tokens) +LARGE_SYSTEM_PROMPT = """You are an advanced AI assistant with extensive knowledge across multiple domains. + +# Core Capabilities + +## Technical Knowledge +- Software Engineering: Expert in Python, JavaScript, TypeScript, Go, Rust, and many other languages +- System Design: Deep understanding of distributed systems, microservices, and cloud architecture +- DevOps: Proficient in Docker, Kubernetes, CI/CD pipelines, and infrastructure as code +- Databases: Experience with SQL (PostgreSQL, MySQL) and NoSQL (MongoDB, Redis, Cassandra) databases +- Machine Learning: Knowledge of neural networks, transformers, and modern ML frameworks + +## Problem Solving Approach +When tackling problems, you follow a structured methodology: +1. Understand the requirements thoroughly +2. Break down complex problems into manageable components +3. Consider multiple solution approaches +4. Evaluate trade-offs between different options +5. Implement solutions with clean, maintainable code +6. Test thoroughly and iterate based on feedback + +## Communication Style +- Clear and concise explanations +- Use examples and analogies when helpful +- Adapt technical depth to the audience +- Ask clarifying questions when requirements are ambiguous +- Provide context and rationale for recommendations + +# Domain Expertise + +## Web Development +You have deep knowledge of: +- Frontend: React, Vue, Angular, Next.js, modern CSS frameworks +- Backend: Node.js, Express, FastAPI, Django, Flask +- API Design: REST, GraphQL, gRPC +- Authentication: OAuth, JWT, session management +- Performance: Caching strategies, CDNs, lazy loading + +## Data Engineering +You understand: +- ETL pipelines and data transformation +- Data warehousing concepts (Snowflake, BigQuery, Redshift) +- Stream processing (Kafka, Kinesis) +- Data modeling and schema design +- Data quality and validation + +## Cloud Platforms +You're familiar with: +- AWS: EC2, S3, Lambda, RDS, DynamoDB, CloudFormation +- GCP: Compute Engine, Cloud Storage, Cloud Functions, BigQuery +- Azure: Virtual Machines, Blob Storage, Azure Functions +- Serverless architectures and best practices +- Cost optimization strategies + +## Security +You consider: +- Common vulnerabilities (OWASP Top 10) +- Secure coding practices +- Encryption and key management +- Access control and authorization patterns +- Security audit and compliance requirements + +# Interaction Principles + +## Helpfulness +- Provide actionable guidance +- Share relevant resources and documentation +- Offer multiple approaches when appropriate +- Point out potential pitfalls and edge cases +- Follow up to ensure understanding + +## Accuracy +- Acknowledge limitations and uncertainties +- Distinguish between facts and opinions +- Cite sources when making specific claims +- Correct mistakes promptly when identified +- Stay current with latest developments + +## Respect +- Value diverse perspectives and approaches +- Maintain professional boundaries +- Protect user privacy and confidentiality +- Avoid assumptions about user background +- Be patient with varying skill levels + +Remember: Your goal is to empower users to solve problems and learn, not just to provide answers.""" + + +@pytest.mark.asyncio +async def test_openai_usage_via_adapter(): + """Test OpenAI usage extraction through SimpleLLMRequestAdapter. + + This tests the actual production code path used by letta_agent_v3. + """ + if not _has_openai_credentials(): + pytest.skip("OpenAI credentials not configured") + + client = OpenAIClient() + llm_config = LLMConfig.default_config("gpt-4o-mini") + + adapter = SimpleLLMRequestAdapter( + llm_client=client, + llm_config=llm_config, + ) + + messages = _build_simple_messages("Say hello in exactly 5 words.") + request_data = client.build_request_data(AgentType.letta_v1_agent, messages, llm_config) + + # Call through the adapter (production path) + try: + async for _ in adapter.invoke_llm( + request_data=request_data, + messages=messages, + tools=[], + use_assistant_message=False, + ): + pass + except LLMAuthenticationError: + pytest.skip("OpenAI credentials invalid") + + # Verify usage was extracted + assert adapter.usage is not None, "adapter.usage should not be None" + assert adapter.usage.prompt_tokens > 0, f"prompt_tokens should be > 0, got {adapter.usage.prompt_tokens}" + assert adapter.usage.completion_tokens > 0, f"completion_tokens should be > 0, got {adapter.usage.completion_tokens}" + assert adapter.usage.total_tokens > 0, f"total_tokens should be > 0, got {adapter.usage.total_tokens}" + assert adapter.usage.step_count == 1, f"step_count should be 1, got {adapter.usage.step_count}" + + print(f"OpenAI usage: prompt={adapter.usage.prompt_tokens}, completion={adapter.usage.completion_tokens}") + print(f"OpenAI cache: cached_input={adapter.usage.cached_input_tokens}, cache_write={adapter.usage.cache_write_tokens}") + print(f"OpenAI reasoning: {adapter.usage.reasoning_tokens}") + + +@pytest.mark.asyncio +async def test_anthropic_usage_via_adapter(): + """Test Anthropic usage extraction through SimpleLLMRequestAdapter. + + This tests the actual production code path used by letta_agent_v3. + + Note: Anthropic's input_tokens is NON-cached only. The adapter should + compute total prompt_tokens = input_tokens + cache_read + cache_creation. + """ + if not _has_anthropic_credentials(): + pytest.skip("Anthropic credentials not configured") + + client = AnthropicClient() + llm_config = LLMConfig( + model="claude-3-5-haiku-20241022", + model_endpoint_type="anthropic", + model_endpoint="https://api.anthropic.com/v1", + context_window=200000, + max_tokens=256, + ) + + adapter = SimpleLLMRequestAdapter( + llm_client=client, + llm_config=llm_config, + ) + + # Anthropic requires a system message first + messages = [ + Message(role=MessageRole.system, content=[TextContent(text="You are a helpful assistant.")]), + Message(role=MessageRole.user, content=[TextContent(text="Say hello in exactly 5 words.")]), + ] + request_data = client.build_request_data(AgentType.letta_v1_agent, messages, llm_config, tools=[]) + + # Call through the adapter (production path) + try: + async for _ in adapter.invoke_llm( + request_data=request_data, + messages=messages, + tools=[], + use_assistant_message=False, + ): + pass + except LLMAuthenticationError: + pytest.skip("Anthropic credentials invalid") + + # Verify usage was extracted + assert adapter.usage is not None, "adapter.usage should not be None" + assert adapter.usage.prompt_tokens > 0, f"prompt_tokens should be > 0, got {adapter.usage.prompt_tokens}" + assert adapter.usage.completion_tokens > 0, f"completion_tokens should be > 0, got {adapter.usage.completion_tokens}" + assert adapter.usage.total_tokens > 0, f"total_tokens should be > 0, got {adapter.usage.total_tokens}" + assert adapter.usage.step_count == 1, f"step_count should be 1, got {adapter.usage.step_count}" + + print(f"Anthropic usage: prompt={adapter.usage.prompt_tokens}, completion={adapter.usage.completion_tokens}") + print(f"Anthropic cache: cached_input={adapter.usage.cached_input_tokens}, cache_write={adapter.usage.cache_write_tokens}") + + +@pytest.mark.asyncio +async def test_gemini_usage_via_adapter(): + """Test Gemini usage extraction through SimpleLLMRequestAdapter. + + This tests the actual production code path used by letta_agent_v3. + """ + if not _has_gemini_credentials(): + pytest.skip("Gemini credentials not configured") + + client = GoogleAIClient() + llm_config = LLMConfig( + model="gemini-2.0-flash", + model_endpoint_type="google_ai", + model_endpoint="https://generativelanguage.googleapis.com", + context_window=1048576, + max_tokens=256, + ) + + adapter = SimpleLLMRequestAdapter( + llm_client=client, + llm_config=llm_config, + ) + + messages = _build_simple_messages("Say hello in exactly 5 words.") + request_data = client.build_request_data(AgentType.letta_v1_agent, messages, llm_config, tools=[]) + + # Call through the adapter (production path) + try: + async for _ in adapter.invoke_llm( + request_data=request_data, + messages=messages, + tools=[], + use_assistant_message=False, + ): + pass + except LLMAuthenticationError: + pytest.skip("Gemini credentials invalid") + + # Verify usage was extracted + assert adapter.usage is not None, "adapter.usage should not be None" + assert adapter.usage.prompt_tokens > 0, f"prompt_tokens should be > 0, got {adapter.usage.prompt_tokens}" + assert adapter.usage.completion_tokens > 0, f"completion_tokens should be > 0, got {adapter.usage.completion_tokens}" + assert adapter.usage.total_tokens > 0, f"total_tokens should be > 0, got {adapter.usage.total_tokens}" + assert adapter.usage.step_count == 1, f"step_count should be 1, got {adapter.usage.step_count}" + + print(f"Gemini usage: prompt={adapter.usage.prompt_tokens}, completion={adapter.usage.completion_tokens}") + print(f"Gemini cache: cached_input={adapter.usage.cached_input_tokens}") + print(f"Gemini reasoning: {adapter.usage.reasoning_tokens}") + + +@pytest.mark.asyncio +async def test_openai_prefix_caching_via_adapter(): + """Test OpenAI prefix caching through SimpleLLMRequestAdapter. + + Makes two requests with the same large system prompt to verify + cached_input_tokens is populated on the second request. + + Note: Prefix caching is probabilistic and depends on server-side state. + """ + if not _has_openai_credentials(): + pytest.skip("OpenAI credentials not configured") + + client = OpenAIClient() + llm_config = LLMConfig.default_config("gpt-4o-mini") + + # First request - should populate the cache + adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + messages1 = [ + Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), + Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]), + ] + request_data1 = client.build_request_data(AgentType.letta_v1_agent, messages1, llm_config) + + try: + async for _ in adapter1.invoke_llm(request_data=request_data1, messages=messages1, tools=[], use_assistant_message=False): + pass + except LLMAuthenticationError: + pytest.skip("OpenAI credentials invalid") + + print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}") + + # Second request - same system prompt, should hit cache + adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + messages2 = [ + Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), + Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]), + ] + request_data2 = client.build_request_data(AgentType.letta_v1_agent, messages2, llm_config) + + async for _ in adapter2.invoke_llm(request_data=request_data2, messages=messages2, tools=[], use_assistant_message=False): + pass + + print(f"Request 2 - prompt={adapter2.usage.prompt_tokens}, cached={adapter2.usage.cached_input_tokens}") + + # Verify basic usage + assert adapter2.usage.prompt_tokens > 0 + assert adapter2.usage.completion_tokens > 0 + + # Note: We can't guarantee cache hit, but if it happened, cached_input_tokens should be > 0 + if adapter2.usage.cached_input_tokens and adapter2.usage.cached_input_tokens > 0: + print(f"SUCCESS: OpenAI cache hit! cached_input_tokens={adapter2.usage.cached_input_tokens}") + else: + print("INFO: No cache hit (cache may not have been populated yet)") + + +@pytest.mark.asyncio +async def test_anthropic_prefix_caching_via_adapter(): + """Test Anthropic prefix caching through SimpleLLMRequestAdapter. + + Makes two requests with the same large system prompt using cache_control + to verify cache tokens are populated. + + Note: Anthropic requires explicit cache_control breakpoints. + """ + if not _has_anthropic_credentials(): + pytest.skip("Anthropic credentials not configured") + + client = AnthropicClient() + llm_config = LLMConfig( + model="claude-3-5-haiku-20241022", + model_endpoint_type="anthropic", + model_endpoint="https://api.anthropic.com/v1", + context_window=200000, + max_tokens=256, + ) + + # First request + adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + messages1 = [ + Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), + Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]), + ] + request_data1 = client.build_request_data(AgentType.letta_v1_agent, messages1, llm_config, tools=[]) + + try: + async for _ in adapter1.invoke_llm(request_data=request_data1, messages=messages1, tools=[], use_assistant_message=False): + pass + except LLMAuthenticationError: + pytest.skip("Anthropic credentials invalid") + + print( + f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}, cache_write={adapter1.usage.cache_write_tokens}" + ) + + # Second request + adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + messages2 = [ + Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), + Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]), + ] + request_data2 = client.build_request_data(AgentType.letta_v1_agent, messages2, llm_config, tools=[]) + + async for _ in adapter2.invoke_llm(request_data=request_data2, messages=messages2, tools=[], use_assistant_message=False): + pass + + print( + f"Request 2 - prompt={adapter2.usage.prompt_tokens}, cached={adapter2.usage.cached_input_tokens}, cache_write={adapter2.usage.cache_write_tokens}" + ) + + # Verify basic usage + assert adapter2.usage.prompt_tokens > 0 + assert adapter2.usage.completion_tokens > 0 + + # Check for cache activity + if adapter2.usage.cached_input_tokens and adapter2.usage.cached_input_tokens > 0: + print(f"SUCCESS: Anthropic cache hit! cached_input_tokens={adapter2.usage.cached_input_tokens}") + elif adapter2.usage.cache_write_tokens and adapter2.usage.cache_write_tokens > 0: + print(f"INFO: Anthropic cache write! cache_write_tokens={adapter2.usage.cache_write_tokens}") + else: + print("INFO: No cache activity detected") + + +@pytest.mark.asyncio +async def test_gemini_prefix_caching_via_adapter(): + """Test Gemini prefix caching through SimpleLLMRequestAdapter. + + Makes two requests with the same large system prompt to verify + cached_input_tokens is populated. + + Note: Gemini 2.0+ has implicit caching. + """ + if not _has_gemini_credentials(): + pytest.skip("Gemini credentials not configured") + + client = GoogleAIClient() + llm_config = LLMConfig( + model="gemini-2.0-flash", + model_endpoint_type="google_ai", + model_endpoint="https://generativelanguage.googleapis.com", + context_window=1048576, + max_tokens=256, + ) + + # First request + adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + messages1 = [ + Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), + Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]), + ] + request_data1 = client.build_request_data(AgentType.letta_v1_agent, messages1, llm_config, tools=[]) + + try: + async for _ in adapter1.invoke_llm(request_data=request_data1, messages=messages1, tools=[], use_assistant_message=False): + pass + except LLMAuthenticationError: + pytest.skip("Gemini credentials invalid") + + print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}") + + # Second request + adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + messages2 = [ + Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), + Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]), + ] + request_data2 = client.build_request_data(AgentType.letta_v1_agent, messages2, llm_config, tools=[]) + + async for _ in adapter2.invoke_llm(request_data=request_data2, messages=messages2, tools=[], use_assistant_message=False): + pass + + print(f"Request 2 - prompt={adapter2.usage.prompt_tokens}, cached={adapter2.usage.cached_input_tokens}") + + # Verify basic usage + assert adapter2.usage.prompt_tokens > 0 + assert adapter2.usage.completion_tokens > 0 + + if adapter2.usage.cached_input_tokens and adapter2.usage.cached_input_tokens > 0: + print(f"SUCCESS: Gemini cache hit! cached_input_tokens={adapter2.usage.cached_input_tokens}") + else: + print("INFO: No cache hit detected") diff --git a/tests/test_utils.py b/tests/test_utils.py index 37114aa7..3e23a0b8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,9 +1,10 @@ import pytest from letta.constants import MAX_FILENAME_LENGTH +from letta.errors import LettaInvalidArgumentError from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_function_annotations_from_source from letta.schemas.file import FileMetadata -from letta.server.rest_api.dependencies import HeaderParams +from letta.server.rest_api.dependencies import HeaderParams, get_headers from letta.services.file_processor.chunker.line_chunker import LineChunker from letta.services.helpers.agent_manager_helper import safe_format from letta.utils import is_1_0_sdk_version, sanitize_filename, validate_function_response @@ -11,6 +12,52 @@ from letta.utils import is_1_0_sdk_version, sanitize_filename, validate_function CORE_MEMORY_VAR = "My core memory is that I like to eat bananas" VARS_DICT = {"CORE_MEMORY": CORE_MEMORY_VAR} + +def test_get_headers_user_id_allows_none(): + headers = get_headers( + actor_id=None, + user_agent=None, + project_id=None, + letta_source=None, + sdk_version=None, + message_async=None, + letta_v1_agent=None, + letta_v1_agent_message_async=None, + modal_sandbox=None, + ) + assert isinstance(headers, HeaderParams) + + +def test_get_headers_user_id_rejects_invalid_format(): + with pytest.raises(LettaInvalidArgumentError, match="Invalid user ID format"): + get_headers( + actor_id="not-a-user-id", + user_agent=None, + project_id=None, + letta_source=None, + sdk_version=None, + message_async=None, + letta_v1_agent=None, + letta_v1_agent_message_async=None, + modal_sandbox=None, + ) + + +def test_get_headers_user_id_accepts_valid_format(): + headers = get_headers( + actor_id="user-123e4567-e89b-42d3-8456-426614174000", + user_agent=None, + project_id=None, + letta_source=None, + sdk_version=None, + message_async=None, + letta_v1_agent=None, + letta_v1_agent_message_async=None, + modal_sandbox=None, + ) + assert headers.actor_id == "user-123e4567-e89b-42d3-8456-426614174000" + + # ----------------------------------------------------------------------- # Example source code for testing multiple scenarios, including: # 1) A class-based custom type (which we won't handle properly). @@ -711,11 +758,13 @@ def test_sanitize_null_bytes_dict(): from letta.helpers.json_helpers import sanitize_null_bytes # Test nested dict with null bytes - result = sanitize_null_bytes({ - "key1": "value\x00with\x00nulls", - "key2": {"nested": "also\x00null"}, - "key3": 123, # non-string should be unchanged - }) + result = sanitize_null_bytes( + { + "key1": "value\x00with\x00nulls", + "key2": {"nested": "also\x00null"}, + "key3": 123, # non-string should be unchanged + } + ) assert result == { "key1": "valuewithnulls", "key2": {"nested": "alsonull"}, diff --git a/uv.lock b/uv.lock index 22b7d46b..8028a7a9 100644 --- a/uv.lock +++ b/uv.lock @@ -2510,13 +2510,15 @@ wheels = [ [[package]] name = "letta" -version = "0.16.2" +version = "0.16.4" source = { editable = "." } dependencies = [ + { name = "aiofiles" }, { name = "aiomultiprocess" }, { name = "alembic" }, { name = "anthropic" }, { name = "apscheduler" }, + { name = "async-lru" }, { name = "black", extra = ["jupyter"] }, { name = "brotli" }, { name = "certifi" }, @@ -2661,12 +2663,14 @@ sqlite = [ [package.metadata] requires-dist = [ { name = "aioboto3", marker = "extra == 'bedrock'", specifier = ">=14.3.0" }, + { name = "aiofiles", specifier = ">=24.1.0" }, { name = "aiomultiprocess", specifier = ">=0.9.1" }, { name = "aiosqlite", marker = "extra == 'desktop'", specifier = ">=0.21.0" }, { name = "aiosqlite", marker = "extra == 'sqlite'", specifier = ">=0.21.0" }, { name = "alembic", specifier = ">=1.13.3" }, { name = "anthropic", specifier = ">=0.75.0" }, { name = "apscheduler", specifier = ">=3.11.0" }, + { name = "async-lru", specifier = ">=2.0.5" }, { name = "async-lru", marker = "extra == 'desktop'", specifier = ">=2.0.5" }, { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" }, { name = "black", extras = ["jupyter"], specifier = ">=24.2.0" },