feat: add support for opus 4.5 (#6256)
* feat: add support for new model * fix: just stage-api && just publish-api (anthropic model settings changed) * fix: just stage-api && just publish-api (anthropic model settings changed) * fix: make kevlar have default reasoning on * fix: bump anthropic sdk version * fix: patch name * pin newer version anthropic --------- Co-authored-by: Ari Webb <ari@letta.com>
This commit is contained in:
committed by
Caren Thomas
parent
30dab0abb9
commit
c4699b3d17
@@ -19789,6 +19789,19 @@
|
|||||||
],
|
],
|
||||||
"title": "Verbosity",
|
"title": "Verbosity",
|
||||||
"description": "Soft control for how verbose model output should be, used for GPT-5 models."
|
"description": "Soft control for how verbose model output should be, used for GPT-5 models."
|
||||||
|
},
|
||||||
|
"effort": {
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["low", "medium", "high"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Effort",
|
||||||
|
"description": "Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -28905,6 +28918,19 @@
|
|||||||
"description": "Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
|
"description": "Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
|
||||||
"default": 0
|
"default": 0
|
||||||
},
|
},
|
||||||
|
"effort": {
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["low", "medium", "high"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Effort",
|
||||||
|
"description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'."
|
||||||
|
},
|
||||||
"frequency_penalty": {
|
"frequency_penalty": {
|
||||||
"anyOf": [
|
"anyOf": [
|
||||||
{
|
{
|
||||||
@@ -31069,6 +31095,19 @@
|
|||||||
"default": 0,
|
"default": 0,
|
||||||
"deprecated": true
|
"deprecated": true
|
||||||
},
|
},
|
||||||
|
"effort": {
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["low", "medium", "high"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Effort",
|
||||||
|
"description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'."
|
||||||
|
},
|
||||||
"frequency_penalty": {
|
"frequency_penalty": {
|
||||||
"anyOf": [
|
"anyOf": [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -72,6 +72,14 @@ class AnthropicClient(LLMClientBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Opus 4.5 effort parameter - to extend to other models, modify the model check
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None:
|
||||||
|
betas.append("effort-2025-11-24")
|
||||||
|
|
||||||
|
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
|
||||||
|
betas.append("context-management-2025-06-27")
|
||||||
|
|
||||||
if betas:
|
if betas:
|
||||||
response = client.beta.messages.create(**request_data, betas=betas)
|
response = client.beta.messages.create(**request_data, betas=betas)
|
||||||
else:
|
else:
|
||||||
@@ -98,6 +106,14 @@ class AnthropicClient(LLMClientBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Opus 4.5 effort parameter - to extend to other models, modify the model check
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None:
|
||||||
|
betas.append("effort-2025-11-24")
|
||||||
|
|
||||||
|
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
|
||||||
|
betas.append("context-management-2025-06-27")
|
||||||
|
|
||||||
if betas:
|
if betas:
|
||||||
response = await client.beta.messages.create(**request_data, betas=betas)
|
response = await client.beta.messages.create(**request_data, betas=betas)
|
||||||
else:
|
else:
|
||||||
@@ -131,6 +147,14 @@ class AnthropicClient(LLMClientBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Opus 4.5 effort parameter - to extend to other models, modify the model check
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None:
|
||||||
|
betas.append("effort-2025-11-24")
|
||||||
|
|
||||||
|
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
|
||||||
|
betas.append("context-management-2025-06-27")
|
||||||
|
|
||||||
return await client.beta.messages.create(**request_data, betas=betas)
|
return await client.beta.messages.create(**request_data, betas=betas)
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
@@ -271,6 +295,23 @@ class AnthropicClient(LLMClientBase):
|
|||||||
# Silently disable prefix_fill for now
|
# Silently disable prefix_fill for now
|
||||||
prefix_fill = False
|
prefix_fill = False
|
||||||
|
|
||||||
|
# Effort configuration for Opus 4.5 (controls token spending)
|
||||||
|
# To extend to other models, modify the model check
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None:
|
||||||
|
data["output_config"] = {"effort": llm_config.effort}
|
||||||
|
|
||||||
|
# Context management for Opus 4.5 to preserve thinking blocks and improve cache hits
|
||||||
|
# See: https://docs.anthropic.com/en/docs/build-with-claude/context-editing
|
||||||
|
if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
|
||||||
|
data["context_management"] = {
|
||||||
|
"edits": [
|
||||||
|
{
|
||||||
|
"type": "clear_thinking_20251015",
|
||||||
|
"keep": "all", # Preserve all thinking blocks for maximum cache performance
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
# Tools
|
# Tools
|
||||||
# For an overview on tool choice:
|
# For an overview on tool choice:
|
||||||
# https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview
|
# https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview
|
||||||
@@ -541,6 +582,17 @@ class AnthropicClient(LLMClientBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Opus 4.5 beta flags for effort and context management
|
||||||
|
# Note: effort beta is added if model is kevlar (actual effort value is in count_params)
|
||||||
|
# Context management beta is added for consistency with main requests
|
||||||
|
if model and model.startswith("claude-opus-4-5"):
|
||||||
|
# Add effort beta if output_config is present in count_params
|
||||||
|
if "output_config" in count_params:
|
||||||
|
betas.append("effort-2025-11-24")
|
||||||
|
# Add context management beta if thinking is enabled
|
||||||
|
if thinking_enabled:
|
||||||
|
betas.append("context-management-2025-06-27")
|
||||||
|
|
||||||
if betas:
|
if betas:
|
||||||
result = await client.beta.messages.count_tokens(**count_params, betas=betas)
|
result = await client.beta.messages.count_tokens(**count_params, betas=betas)
|
||||||
else:
|
else:
|
||||||
@@ -559,6 +611,8 @@ class AnthropicClient(LLMClientBase):
|
|||||||
or llm_config.model.startswith("claude-sonnet-4")
|
or llm_config.model.startswith("claude-sonnet-4")
|
||||||
or llm_config.model.startswith("claude-opus-4")
|
or llm_config.model.startswith("claude-opus-4")
|
||||||
or llm_config.model.startswith("claude-haiku-4-5")
|
or llm_config.model.startswith("claude-haiku-4-5")
|
||||||
|
# Opus 4.5 support - to extend effort parameter to other models, modify this check
|
||||||
|
or llm_config.model.startswith("claude-opus-4-5")
|
||||||
)
|
)
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
|
|||||||
@@ -77,6 +77,10 @@ class LLMConfig(BaseModel):
|
|||||||
0,
|
0,
|
||||||
description="Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
|
description="Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
|
||||||
)
|
)
|
||||||
|
effort: Optional[Literal["low", "medium", "high"]] = Field(
|
||||||
|
None,
|
||||||
|
description="The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'.",
|
||||||
|
)
|
||||||
frequency_penalty: Optional[float] = Field(
|
frequency_penalty: Optional[float] = Field(
|
||||||
None, # Can also deafult to 0.0?
|
None, # Can also deafult to 0.0?
|
||||||
description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
|
description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
|
||||||
@@ -172,6 +176,7 @@ class LLMConfig(BaseModel):
|
|||||||
or model.startswith("claude-sonnet-4")
|
or model.startswith("claude-sonnet-4")
|
||||||
or model.startswith("claude-opus-4")
|
or model.startswith("claude-opus-4")
|
||||||
or model.startswith("claude-haiku-4-5")
|
or model.startswith("claude-haiku-4-5")
|
||||||
|
or model.startswith("claude-opus-4-5")
|
||||||
):
|
):
|
||||||
values["put_inner_thoughts_in_kwargs"] = False
|
values["put_inner_thoughts_in_kwargs"] = False
|
||||||
|
|
||||||
@@ -372,6 +377,7 @@ class LLMConfig(BaseModel):
|
|||||||
or config.model.startswith("claude-sonnet-4")
|
or config.model.startswith("claude-sonnet-4")
|
||||||
or config.model.startswith("claude-3-7-sonnet")
|
or config.model.startswith("claude-3-7-sonnet")
|
||||||
or config.model.startswith("claude-haiku-4-5")
|
or config.model.startswith("claude-haiku-4-5")
|
||||||
|
or config.model.startswith("claude-opus-4-5")
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -268,6 +268,12 @@ class AnthropicModelSettings(ModelSettings):
|
|||||||
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Opus 4.5 effort parameter
|
||||||
|
effort: Optional[Literal["low", "medium", "high"]] = Field(
|
||||||
|
None,
|
||||||
|
description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.",
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: implement support for these
|
# TODO: implement support for these
|
||||||
# top_k: Optional[int] = Field(None, description="The number of top tokens to return.")
|
# top_k: Optional[int] = Field(None, description="The number of top tokens to return.")
|
||||||
# top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.")
|
# top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.")
|
||||||
@@ -280,6 +286,7 @@ class AnthropicModelSettings(ModelSettings):
|
|||||||
"thinking_budget_tokens": self.thinking.budget_tokens,
|
"thinking_budget_tokens": self.thinking.budget_tokens,
|
||||||
"verbosity": self.verbosity,
|
"verbosity": self.verbosity,
|
||||||
"parallel_tool_calls": self.parallel_tool_calls,
|
"parallel_tool_calls": self.parallel_tool_calls,
|
||||||
|
"effort": self.effort,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -93,6 +93,11 @@ MODEL_LIST = [
|
|||||||
"name": "claude-3-5-haiku-latest",
|
"name": "claude-3-5-haiku-latest",
|
||||||
"context_window": 200000,
|
"context_window": 200000,
|
||||||
},
|
},
|
||||||
|
## Opus 4.5
|
||||||
|
{
|
||||||
|
"name": "claude-opus-4-5-20251101",
|
||||||
|
"context_window": 200000,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ dependencies = [
|
|||||||
"grpcio-tools>=1.68.1",
|
"grpcio-tools>=1.68.1",
|
||||||
"llama-index>=0.12.2",
|
"llama-index>=0.12.2",
|
||||||
"llama-index-embeddings-openai>=0.3.1",
|
"llama-index-embeddings-openai>=0.3.1",
|
||||||
"anthropic>=0.49.0",
|
"anthropic>=0.75.0",
|
||||||
"letta-client>=0.1.319",
|
"letta-client>=0.1.319",
|
||||||
"openai>=1.99.9",
|
"openai>=1.99.9",
|
||||||
"opentelemetry-api==1.30.0",
|
"opentelemetry-api==1.30.0",
|
||||||
|
|||||||
9
uv.lock
generated
9
uv.lock
generated
@@ -209,20 +209,21 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anthropic"
|
name = "anthropic"
|
||||||
version = "0.64.0"
|
version = "0.75.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "anyio" },
|
{ name = "anyio" },
|
||||||
{ name = "distro" },
|
{ name = "distro" },
|
||||||
|
{ name = "docstring-parser" },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
{ name = "jiter" },
|
{ name = "jiter" },
|
||||||
{ name = "pydantic" },
|
{ name = "pydantic" },
|
||||||
{ name = "sniffio" },
|
{ name = "sniffio" },
|
||||||
{ name = "typing-extensions" },
|
{ name = "typing-extensions" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/d8/4f/f2b880cba1a76f3acc7d5eb2ae217632eac1b8cef5ed3027493545c59eba/anthropic-0.64.0.tar.gz", hash = "sha256:3d496c91a63dff64f451b3e8e4b238a9640bf87b0c11d0b74ddc372ba5a3fe58", size = 427893, upload-time = "2025-08-13T17:09:49.915Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565, upload-time = "2025-11-24T20:41:45.28Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/a9/b2/2d268bcd5d6441df9dc0ebebc67107657edb8b0150d3fda1a5b81d1bec45/anthropic-0.64.0-py3-none-any.whl", hash = "sha256:6f5f7d913a6a95eb7f8e1bda4e75f76670e8acd8d4cd965e02e2a256b0429dd1", size = 297244, upload-time = "2025-08-13T17:09:47.908Z" },
|
{ url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164, upload-time = "2025-11-24T20:41:43.587Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2486,7 +2487,7 @@ requires-dist = [
|
|||||||
{ name = "aiosqlite", marker = "extra == 'desktop'", specifier = ">=0.21.0" },
|
{ name = "aiosqlite", marker = "extra == 'desktop'", specifier = ">=0.21.0" },
|
||||||
{ name = "aiosqlite", marker = "extra == 'sqlite'", specifier = ">=0.21.0" },
|
{ name = "aiosqlite", marker = "extra == 'sqlite'", specifier = ">=0.21.0" },
|
||||||
{ name = "alembic", specifier = ">=1.13.3" },
|
{ name = "alembic", specifier = ">=1.13.3" },
|
||||||
{ name = "anthropic", specifier = ">=0.49.0" },
|
{ name = "anthropic", specifier = ">=0.75.0" },
|
||||||
{ name = "apscheduler", specifier = ">=3.11.0" },
|
{ name = "apscheduler", specifier = ">=3.11.0" },
|
||||||
{ name = "async-lru", marker = "extra == 'desktop'", specifier = ">=2.0.5" },
|
{ name = "async-lru", marker = "extra == 'desktop'", specifier = ">=2.0.5" },
|
||||||
{ name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" },
|
{ name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" },
|
||||||
|
|||||||
Reference in New Issue
Block a user