Revert "fix(core): add OpenAI prompt cache key and model-gated 24h retention" (#9502)

Revert "fix(core): add OpenAI prompt cache key and model-gated 24h retention …"

This reverts commit f5bb9c629cb7d45544e90758cdfb899bcef41912.
This commit is contained in:
Ari Webb
2026-02-16 21:24:04 -08:00
committed by Caren Thomas
parent 2bf3314cef
commit fa70e09963
5 changed files with 3 additions and 444 deletions

View File

@@ -247,81 +247,6 @@ class OpenAIClient(LLMClientBase):
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
return supports_structured_output(llm_config)
def _is_openrouter_request(self, llm_config: LLMConfig) -> bool:
return (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (llm_config.provider_name == "openrouter")
def _is_true_openai_request(self, llm_config: LLMConfig) -> bool:
if llm_config.model_endpoint_type != "openai":
return False
if self._is_openrouter_request(llm_config):
return False
# Keep Letta inference endpoint behavior unchanged.
if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
return False
# If provider_name is explicitly set and not openai, don't apply OpenAI-specific prompt caching fields.
if llm_config.provider_name and llm_config.provider_name != "openai":
return False
return True
def _normalize_model_name(self, model: Optional[str]) -> Optional[str]:
if not model:
return None
return model.split("/", 1)[-1]
def _supports_extended_prompt_cache_retention(self, model: Optional[str]) -> bool:
normalized_model = self._normalize_model_name(model)
if not normalized_model:
return False
# Per OpenAI docs: extended retention is available on gpt-4.1 and gpt-5 family models but not gpt-5-mini or gpt-5.2-codex.
exceptions = ["gpt-5-mini", "gpt-5.2-codex"]
return normalized_model == "gpt-4.1" or normalized_model.startswith("gpt-5") and normalized_model not in exceptions
def _build_prompt_cache_key(self, messages: List[PydanticMessage]) -> Optional[str]:
agent_id = None
conversation_id = None
for message in reversed(messages):
if agent_id is None and getattr(message, "agent_id", None):
agent_id = message.agent_id
if conversation_id is None and getattr(message, "conversation_id", None):
conversation_id = message.conversation_id
if agent_id is not None and conversation_id is not None:
break
if agent_id is None:
agent_id = self._telemetry_agent_id
if agent_id is None:
return None
# Use requested fallback string for non-conversation/default-conversation paths.
if not conversation_id or conversation_id == "default":
conversation_id = "defaultconv"
return f"letta:{agent_id}:{conversation_id}"
def _apply_prompt_cache_settings(
self,
llm_config: LLMConfig,
model: Optional[str],
messages: List[PydanticMessage],
request_obj: Any,
) -> None:
if not self._is_true_openai_request(llm_config):
return
prompt_cache_key = self._build_prompt_cache_key(messages)
if prompt_cache_key:
request_obj.prompt_cache_key = prompt_cache_key
if self._supports_extended_prompt_cache_retention(model):
request_obj.prompt_cache_retention = "24h"
@trace_method
def build_request_data_responses(
self,
@@ -462,13 +387,6 @@ class OpenAIClient(LLMClientBase):
data.model = "memgpt-openai"
self._apply_prompt_cache_settings(
llm_config=llm_config,
model=model,
messages=messages,
request_obj=data,
)
request_data = data.model_dump(exclude_unset=True)
# print("responses request data", request_data)
return request_data
@@ -537,7 +455,9 @@ class OpenAIClient(LLMClientBase):
model = None
# TODO: we may need to extend this to more models using proxy?
is_openrouter = self._is_openrouter_request(llm_config)
is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
llm_config.provider_name == "openrouter"
)
if is_openrouter:
try:
model = llm_config.handle.split("/", 1)[-1]
@@ -640,13 +560,6 @@ class OpenAIClient(LLMClientBase):
new_tools.append(tool.model_copy(deep=True))
data.tools = new_tools
self._apply_prompt_cache_settings(
llm_config=llm_config,
model=model,
messages=messages,
request_obj=data,
)
# Note: Tools are already processed by enable_strict_mode() in the workflow/agent code
# (temporal_letta_v1_agent_workflow.py or letta_agent_v3.py) before reaching here.
# enable_strict_mode() handles: strict flag, additionalProperties, required array, nullable fields