Revert "fix(core): add OpenAI prompt cache key and model-gated 24h retention" (#9502)

Revert "fix(core): add OpenAI prompt cache key and model-gated 24h retention …" This reverts commit f5bb9c629cb7d45544e90758cdfb899bcef41912.
2026-02-16 21:24:04 -08:00
parent 2bf3314cef
commit fa70e09963
5 changed files with 3 additions and 444 deletions
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -247,81 +247,6 @@ class OpenAIClient(LLMClientBase):
    def supports_structured_output(self, llm_config: LLMConfig) -> bool:
        return supports_structured_output(llm_config)

-    def _is_openrouter_request(self, llm_config: LLMConfig) -> bool:
-        return (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (llm_config.provider_name == "openrouter")
-
-    def _is_true_openai_request(self, llm_config: LLMConfig) -> bool:
-        if llm_config.model_endpoint_type != "openai":
-            return False
-
-        if self._is_openrouter_request(llm_config):
-            return False
-
-        # Keep Letta inference endpoint behavior unchanged.
-        if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
-            return False
-
-        # If provider_name is explicitly set and not openai, don't apply OpenAI-specific prompt caching fields.
-        if llm_config.provider_name and llm_config.provider_name != "openai":
-            return False
-
-        return True
-
-    def _normalize_model_name(self, model: Optional[str]) -> Optional[str]:
-        if not model:
-            return None
-        return model.split("/", 1)[-1]
-
-    def _supports_extended_prompt_cache_retention(self, model: Optional[str]) -> bool:
-        normalized_model = self._normalize_model_name(model)
-        if not normalized_model:
-            return False
-
-        # Per OpenAI docs: extended retention is available on gpt-4.1 and gpt-5 family models but not gpt-5-mini or gpt-5.2-codex.
-        exceptions = ["gpt-5-mini", "gpt-5.2-codex"]
-        return normalized_model == "gpt-4.1" or normalized_model.startswith("gpt-5") and normalized_model not in exceptions
-
-    def _build_prompt_cache_key(self, messages: List[PydanticMessage]) -> Optional[str]:
-        agent_id = None
-        conversation_id = None
-
-        for message in reversed(messages):
-            if agent_id is None and getattr(message, "agent_id", None):
-                agent_id = message.agent_id
-            if conversation_id is None and getattr(message, "conversation_id", None):
-                conversation_id = message.conversation_id
-            if agent_id is not None and conversation_id is not None:
-                break
-
-        if agent_id is None:
-            agent_id = self._telemetry_agent_id
-
-        if agent_id is None:
-            return None
-
-        # Use requested fallback string for non-conversation/default-conversation paths.
-        if not conversation_id or conversation_id == "default":
-            conversation_id = "defaultconv"
-
-        return f"letta:{agent_id}:{conversation_id}"
-
-    def _apply_prompt_cache_settings(
-        self,
-        llm_config: LLMConfig,
-        model: Optional[str],
-        messages: List[PydanticMessage],
-        request_obj: Any,
-    ) -> None:
-        if not self._is_true_openai_request(llm_config):
-            return
-
-        prompt_cache_key = self._build_prompt_cache_key(messages)
-        if prompt_cache_key:
-            request_obj.prompt_cache_key = prompt_cache_key
-
-        if self._supports_extended_prompt_cache_retention(model):
-            request_obj.prompt_cache_retention = "24h"
-
    @trace_method
    def build_request_data_responses(
        self,
@@ -462,13 +387,6 @@ class OpenAIClient(LLMClientBase):

            data.model = "memgpt-openai"

-        self._apply_prompt_cache_settings(
-            llm_config=llm_config,
-            model=model,
-            messages=messages,
-            request_obj=data,
-        )
-
        request_data = data.model_dump(exclude_unset=True)
        # print("responses request data", request_data)
        return request_data
@@ -537,7 +455,9 @@ class OpenAIClient(LLMClientBase):
            model = None

        # TODO: we may need to extend this to more models using proxy?
-        is_openrouter = self._is_openrouter_request(llm_config)
+        is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
+            llm_config.provider_name == "openrouter"
+        )
        if is_openrouter:
            try:
                model = llm_config.handle.split("/", 1)[-1]
@@ -640,13 +560,6 @@ class OpenAIClient(LLMClientBase):
                    new_tools.append(tool.model_copy(deep=True))
                data.tools = new_tools

-        self._apply_prompt_cache_settings(
-            llm_config=llm_config,
-            model=model,
-            messages=messages,
-            request_obj=data,
-        )
-
        # Note: Tools are already processed by enable_strict_mode() in the workflow/agent code
        # (temporal_letta_v1_agent_workflow.py or letta_agent_v3.py) before reaching here.
        # enable_strict_mode() handles: strict flag, additionalProperties, required array, nullable fields