diff --git a/letta/helpers/datetime_helpers.py b/letta/helpers/datetime_helpers.py index e99074a6..7ee4aa40 100644 --- a/letta/helpers/datetime_helpers.py +++ b/letta/helpers/datetime_helpers.py @@ -66,6 +66,15 @@ def get_utc_time() -> datetime: return datetime.now(timezone.utc) +def get_utc_time_int() -> int: + return int(get_utc_time().timestamp()) + + +def timestamp_to_datetime(timestamp_seconds: int) -> datetime: + """Convert Unix timestamp in seconds to UTC datetime object""" + return datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc) + + def format_datetime(dt): return dt.strftime("%Y-%m-%d %I:%M:%S %p %Z%z") diff --git a/letta/llm_api/anthropic.py b/letta/llm_api/anthropic.py index 2f6bd296..59939e4d 100644 --- a/letta/llm_api/anthropic.py +++ b/letta/llm_api/anthropic.py @@ -20,7 +20,7 @@ from anthropic.types.beta import ( ) from letta.errors import BedrockError, BedrockPermissionError -from letta.helpers.datetime_helpers import get_utc_time +from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime from letta.llm_api.aws_bedrock import get_bedrock_client from letta.llm_api.helpers import add_inner_thoughts_to_functions from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION @@ -396,7 +396,7 @@ def convert_anthropic_response_to_chatcompletion( return ChatCompletionResponse( id=response.id, choices=[choice], - created=get_utc_time(), + created=get_utc_time_int(), model=response.model, usage=UsageStatistics( prompt_tokens=prompt_tokens, @@ -451,7 +451,7 @@ def convert_anthropic_stream_event_to_chatcompletion( 'logprobs': None } ], - 'created': datetime.datetime(2025, 1, 24, 0, 18, 55, tzinfo=TzInfo(UTC)), + 'created': 1713216662, 'model': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_bd83329f63', 'object': 'chat.completion.chunk' @@ -613,7 +613,7 @@ def convert_anthropic_stream_event_to_chatcompletion( return ChatCompletionChunkResponse( id=message_id, choices=[choice], - created=get_utc_time(), + created=get_utc_time_int(), model=model, output_tokens=completion_chunk_tokens, ) @@ -920,7 +920,7 @@ def anthropic_chat_completions_process_stream( chat_completion_response = ChatCompletionResponse( id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID, choices=[], - created=dummy_message.created_at, + created=int(dummy_message.created_at.timestamp()), model=chat_completion_request.model, usage=UsageStatistics( prompt_tokens=prompt_tokens, @@ -954,7 +954,11 @@ def anthropic_chat_completions_process_stream( message_type = stream_interface.process_chunk( chat_completion_chunk, message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id, - message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created, + message_date=( + timestamp_to_datetime(chat_completion_response.created) + if create_message_datetime + else timestamp_to_datetime(chat_completion_chunk.created) + ), # if extended_thinking is on, then reasoning_content will be flowing as chunks # TODO handle emitting redacted reasoning content (e.g. as concat?) expect_reasoning_content=extended_thinking, diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index cd9c0815..4c79cb68 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -22,7 +22,7 @@ from letta.errors import ( LLMServerError, LLMUnprocessableEntityError, ) -from letta.helpers.datetime_helpers import get_utc_time +from letta.helpers.datetime_helpers import get_utc_time_int from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs from letta.llm_api.llm_client_base import LLMClientBase from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION @@ -403,7 +403,7 @@ class AnthropicClient(LLMClientBase): chat_completion_response = ChatCompletionResponse( id=response.id, choices=[choice], - created=get_utc_time(), + created=get_utc_time_int(), model=response.model, usage=UsageStatistics( prompt_tokens=prompt_tokens, diff --git a/letta/llm_api/cohere.py b/letta/llm_api/cohere.py index 640e0c09..4a30d796 100644 --- a/letta/llm_api/cohere.py +++ b/letta/llm_api/cohere.py @@ -4,7 +4,7 @@ from typing import List, Optional, Union import requests -from letta.helpers.datetime_helpers import get_utc_time +from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.json_helpers import json_dumps from letta.local_llm.utils import count_tokens from letta.schemas.message import Message @@ -207,7 +207,7 @@ def convert_cohere_response_to_chatcompletion( return ChatCompletionResponse( id=response_json["response_id"], choices=[choice], - created=get_utc_time(), + created=get_utc_time_int(), model=model, usage=UsageStatistics( prompt_tokens=prompt_tokens, diff --git a/letta/llm_api/google_ai_client.py b/letta/llm_api/google_ai_client.py index 72e56139..e471cb85 100644 --- a/letta/llm_api/google_ai_client.py +++ b/letta/llm_api/google_ai_client.py @@ -6,7 +6,7 @@ import requests from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig from letta.constants import NON_USER_MSG_PREFIX -from letta.helpers.datetime_helpers import get_utc_time +from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.json_helpers import json_dumps from letta.llm_api.helpers import make_post_request from letta.llm_api.llm_client_base import LLMClientBase @@ -260,7 +260,7 @@ class GoogleAIClient(LLMClientBase): id=response_id, choices=choices, model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response - created=get_utc_time(), + created=get_utc_time_int(), usage=usage, ) except KeyError as e: diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 3bd4eb95..f22faeba 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -4,7 +4,7 @@ from typing import List, Optional from google import genai from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ToolConfig -from letta.helpers.datetime_helpers import get_utc_time +from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.json_helpers import json_dumps from letta.llm_api.google_ai_client import GoogleAIClient from letta.local_llm.json_parser import clean_json_string_extra_backslash @@ -225,7 +225,7 @@ class GoogleVertexClient(GoogleAIClient): id=response_id, choices=choices, model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response - created=get_utc_time(), + created=get_utc_time_int(), usage=usage, ) except KeyError as e: diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py index 428d7f11..eda4c9a8 100644 --- a/letta/llm_api/openai.py +++ b/letta/llm_api/openai.py @@ -4,6 +4,7 @@ from typing import Generator, List, Optional, Union import requests from openai import OpenAI +from letta.helpers.datetime_helpers import timestamp_to_datetime from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request from letta.llm_api.openai_client import supports_parallel_tool_calling, supports_temperature_param from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST @@ -238,7 +239,7 @@ def openai_chat_completions_process_stream( chat_completion_response = ChatCompletionResponse( id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID, choices=[], - created=dummy_message.created_at, # NOTE: doesn't matter since both will do get_utc_time() + created=int(dummy_message.created_at.timestamp()), # NOTE: doesn't matter since both will do get_utc_time() model=chat_completion_request.model, usage=UsageStatistics( completion_tokens=0, @@ -275,7 +276,11 @@ def openai_chat_completions_process_stream( message_type = stream_interface.process_chunk( chat_completion_chunk, message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id, - message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created, + message_date=( + timestamp_to_datetime(chat_completion_response.created) + if create_message_datetime + else timestamp_to_datetime(chat_completion_chunk.created) + ), expect_reasoning_content=expect_reasoning_content, name=name, message_index=message_idx, diff --git a/letta/local_llm/chat_completion_proxy.py b/letta/local_llm/chat_completion_proxy.py index 4abc01ee..35db97ed 100644 --- a/letta/local_llm/chat_completion_proxy.py +++ b/letta/local_llm/chat_completion_proxy.py @@ -6,7 +6,7 @@ import requests from letta.constants import CLI_WARNING_PREFIX from letta.errors import LocalLLMConnectionError, LocalLLMError -from letta.helpers.datetime_helpers import get_utc_time +from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.json_helpers import json_dumps from letta.local_llm.constants import DEFAULT_WRAPPER from letta.local_llm.function_parser import patch_function @@ -241,7 +241,7 @@ def get_chat_completion( ), ) ], - created=get_utc_time(), + created=get_utc_time_int(), model=model, # "This fingerprint represents the backend configuration that the model runs with." # system_fingerprint=user if user is not None else "null", diff --git a/letta/schemas/openai/chat_completion_response.py b/letta/schemas/openai/chat_completion_response.py index c217f973..d4332b22 100644 --- a/letta/schemas/openai/chat_completion_response.py +++ b/letta/schemas/openai/chat_completion_response.py @@ -119,7 +119,7 @@ class ChatCompletionResponse(BaseModel): id: str choices: List[Choice] - created: datetime.datetime + created: Union[datetime.datetime, int] model: Optional[str] = None # NOTE: this is not consistent with OpenAI API standard, however is necessary to support local LLMs # system_fingerprint: str # docs say this is mandatory, but in reality API returns None system_fingerprint: Optional[str] = None @@ -187,7 +187,7 @@ class ChatCompletionChunkResponse(BaseModel): id: str choices: List[ChunkChoice] - created: Union[datetime.datetime, str] + created: Union[datetime.datetime, int] model: str # system_fingerprint: str # docs say this is mandatory, but in reality API returns None system_fingerprint: Optional[str] = None diff --git a/letta/server/rest_api/chat_completions_interface.py b/letta/server/rest_api/chat_completions_interface.py index 77550a52..0f684ed7 100644 --- a/letta/server/rest_api/chat_completions_interface.py +++ b/letta/server/rest_api/chat_completions_interface.py @@ -238,7 +238,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface): return ChatCompletionChunk( id=chunk.id, object=chunk.object, - created=chunk.created.timestamp(), + created=chunk.created, model=chunk.model, choices=[ Choice( @@ -256,7 +256,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface): return ChatCompletionChunk( id=chunk.id, object=chunk.object, - created=chunk.created.timestamp(), + created=chunk.created, model=chunk.model, choices=[ Choice( diff --git a/letta/server/rest_api/interface.py b/letta/server/rest_api/interface.py index 469ff0a2..edf8a233 100644 --- a/letta/server/rest_api/interface.py +++ b/letta/server/rest_api/interface.py @@ -1001,7 +1001,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface): # Example case that would trigger here: # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ' # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)] - # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC)) + # created=1713216662 # model='gpt-4o-mini-2024-07-18' # object='chat.completion.chunk' warnings.warn(f"Couldn't find delta in chunk: {chunk}")