From c4699b3d17e990632014aaa64ce6128d5527b6c2 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Mon, 24 Nov 2025 15:30:44 -0800 Subject: [PATCH] feat: add support for opus 4.5 (#6256) * feat: add support for new model * fix: just stage-api && just publish-api (anthropic model settings changed) * fix: just stage-api && just publish-api (anthropic model settings changed) * fix: make kevlar have default reasoning on * fix: bump anthropic sdk version * fix: patch name * pin newer version anthropic --------- Co-authored-by: Ari Webb --- fern/openapi.json | 39 ++++++++++++++++++++ letta/llm_api/anthropic_client.py | 54 ++++++++++++++++++++++++++++ letta/schemas/llm_config.py | 6 ++++ letta/schemas/model.py | 7 ++++ letta/schemas/providers/anthropic.py | 5 +++ pyproject.toml | 2 +- uv.lock | 9 ++--- 7 files changed, 117 insertions(+), 5 deletions(-) diff --git a/fern/openapi.json b/fern/openapi.json index 30c58aee..2168bd07 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -19789,6 +19789,19 @@ ], "title": "Verbosity", "description": "Soft control for how verbose model output should be, used for GPT-5 models." + }, + "effort": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Effort", + "description": "Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'." } }, "type": "object", @@ -28905,6 +28918,19 @@ "description": "Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.", "default": 0 }, + "effort": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Effort", + "description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'." + }, "frequency_penalty": { "anyOf": [ { @@ -31069,6 +31095,19 @@ "default": 0, "deprecated": true }, + "effort": { + "anyOf": [ + { + "type": "string", + "enum": ["low", "medium", "high"] + }, + { + "type": "null" + } + ], + "title": "Effort", + "description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'." + }, "frequency_penalty": { "anyOf": [ { diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 21ec3473..9fdac426 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -72,6 +72,14 @@ class AnthropicClient(LLMClientBase): except Exception: pass + # Opus 4.5 effort parameter - to extend to other models, modify the model check + if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + betas.append("effort-2025-11-24") + + # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) + if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: + betas.append("context-management-2025-06-27") + if betas: response = client.beta.messages.create(**request_data, betas=betas) else: @@ -98,6 +106,14 @@ class AnthropicClient(LLMClientBase): except Exception: pass + # Opus 4.5 effort parameter - to extend to other models, modify the model check + if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + betas.append("effort-2025-11-24") + + # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) + if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: + betas.append("context-management-2025-06-27") + if betas: response = await client.beta.messages.create(**request_data, betas=betas) else: @@ -131,6 +147,14 @@ class AnthropicClient(LLMClientBase): except Exception: pass + # Opus 4.5 effort parameter - to extend to other models, modify the model check + if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + betas.append("effort-2025-11-24") + + # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) + if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: + betas.append("context-management-2025-06-27") + return await client.beta.messages.create(**request_data, betas=betas) @trace_method @@ -271,6 +295,23 @@ class AnthropicClient(LLMClientBase): # Silently disable prefix_fill for now prefix_fill = False + # Effort configuration for Opus 4.5 (controls token spending) + # To extend to other models, modify the model check + if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + data["output_config"] = {"effort": llm_config.effort} + + # Context management for Opus 4.5 to preserve thinking blocks and improve cache hits + # See: https://docs.anthropic.com/en/docs/build-with-claude/context-editing + if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: + data["context_management"] = { + "edits": [ + { + "type": "clear_thinking_20251015", + "keep": "all", # Preserve all thinking blocks for maximum cache performance + } + ] + } + # Tools # For an overview on tool choice: # https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview @@ -541,6 +582,17 @@ class AnthropicClient(LLMClientBase): except Exception: pass + # Opus 4.5 beta flags for effort and context management + # Note: effort beta is added if model is kevlar (actual effort value is in count_params) + # Context management beta is added for consistency with main requests + if model and model.startswith("claude-opus-4-5"): + # Add effort beta if output_config is present in count_params + if "output_config" in count_params: + betas.append("effort-2025-11-24") + # Add context management beta if thinking is enabled + if thinking_enabled: + betas.append("context-management-2025-06-27") + if betas: result = await client.beta.messages.count_tokens(**count_params, betas=betas) else: @@ -559,6 +611,8 @@ class AnthropicClient(LLMClientBase): or llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-opus-4") or llm_config.model.startswith("claude-haiku-4-5") + # Opus 4.5 support - to extend effort parameter to other models, modify this check + or llm_config.model.startswith("claude-opus-4-5") ) @trace_method diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 8e850860..937c3931 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -77,6 +77,10 @@ class LLMConfig(BaseModel): 0, description="Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.", ) + effort: Optional[Literal["low", "medium", "high"]] = Field( + None, + description="The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'.", + ) frequency_penalty: Optional[float] = Field( None, # Can also deafult to 0.0? description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.", @@ -172,6 +176,7 @@ class LLMConfig(BaseModel): or model.startswith("claude-sonnet-4") or model.startswith("claude-opus-4") or model.startswith("claude-haiku-4-5") + or model.startswith("claude-opus-4-5") ): values["put_inner_thoughts_in_kwargs"] = False @@ -372,6 +377,7 @@ class LLMConfig(BaseModel): or config.model.startswith("claude-sonnet-4") or config.model.startswith("claude-3-7-sonnet") or config.model.startswith("claude-haiku-4-5") + or config.model.startswith("claude-opus-4-5") ) @classmethod diff --git a/letta/schemas/model.py b/letta/schemas/model.py index 7c0364b4..ba2d92b1 100644 --- a/letta/schemas/model.py +++ b/letta/schemas/model.py @@ -268,6 +268,12 @@ class AnthropicModelSettings(ModelSettings): description="Soft control for how verbose model output should be, used for GPT-5 models.", ) + # Opus 4.5 effort parameter + effort: Optional[Literal["low", "medium", "high"]] = Field( + None, + description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.", + ) + # TODO: implement support for these # top_k: Optional[int] = Field(None, description="The number of top tokens to return.") # top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.") @@ -280,6 +286,7 @@ class AnthropicModelSettings(ModelSettings): "thinking_budget_tokens": self.thinking.budget_tokens, "verbosity": self.verbosity, "parallel_tool_calls": self.parallel_tool_calls, + "effort": self.effort, } diff --git a/letta/schemas/providers/anthropic.py b/letta/schemas/providers/anthropic.py index 8fb57cf4..6e1e4af7 100644 --- a/letta/schemas/providers/anthropic.py +++ b/letta/schemas/providers/anthropic.py @@ -93,6 +93,11 @@ MODEL_LIST = [ "name": "claude-3-5-haiku-latest", "context_window": 200000, }, + ## Opus 4.5 + { + "name": "claude-opus-4-5-20251101", + "context_window": 200000, + }, ] diff --git a/pyproject.toml b/pyproject.toml index 6e84d80e..01c327a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dependencies = [ "grpcio-tools>=1.68.1", "llama-index>=0.12.2", "llama-index-embeddings-openai>=0.3.1", - "anthropic>=0.49.0", + "anthropic>=0.75.0", "letta-client>=0.1.319", "openai>=1.99.9", "opentelemetry-api==1.30.0", diff --git a/uv.lock b/uv.lock index 704b3fe4..ef678d3e 100644 --- a/uv.lock +++ b/uv.lock @@ -209,20 +209,21 @@ wheels = [ [[package]] name = "anthropic" -version = "0.64.0" +version = "0.75.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "distro" }, + { name = "docstring-parser" }, { name = "httpx" }, { name = "jiter" }, { name = "pydantic" }, { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/4f/f2b880cba1a76f3acc7d5eb2ae217632eac1b8cef5ed3027493545c59eba/anthropic-0.64.0.tar.gz", hash = "sha256:3d496c91a63dff64f451b3e8e4b238a9640bf87b0c11d0b74ddc372ba5a3fe58", size = 427893, upload-time = "2025-08-13T17:09:49.915Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565, upload-time = "2025-11-24T20:41:45.28Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/b2/2d268bcd5d6441df9dc0ebebc67107657edb8b0150d3fda1a5b81d1bec45/anthropic-0.64.0-py3-none-any.whl", hash = "sha256:6f5f7d913a6a95eb7f8e1bda4e75f76670e8acd8d4cd965e02e2a256b0429dd1", size = 297244, upload-time = "2025-08-13T17:09:47.908Z" }, + { url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164, upload-time = "2025-11-24T20:41:43.587Z" }, ] [[package]] @@ -2486,7 +2487,7 @@ requires-dist = [ { name = "aiosqlite", marker = "extra == 'desktop'", specifier = ">=0.21.0" }, { name = "aiosqlite", marker = "extra == 'sqlite'", specifier = ">=0.21.0" }, { name = "alembic", specifier = ">=1.13.3" }, - { name = "anthropic", specifier = ">=0.49.0" }, + { name = "anthropic", specifier = ">=0.75.0" }, { name = "apscheduler", specifier = ">=3.11.0" }, { name = "async-lru", marker = "extra == 'desktop'", specifier = ">=2.0.5" }, { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" },