feat: Support offline memory in voice agent (#1347)

2025-04-28 15:03:02 -07:00
parent 684dd5a391
commit beed896d60
15 changed files with 917 additions and 286 deletions
--- a/letta/interfaces/openai_chat_completions_streaming_interface.py
+++ b/letta/interfaces/openai_chat_completions_streaming_interface.py
@@ -78,25 +78,29 @@ class OpenAIChatCompletionsStreamingInterface:
        """Parses and streams pre-execution messages if they have changed."""
        parsed_args = self.optimistic_json_parser.parse(self.tool_call_args_str)

-        if parsed_args.get(PRE_EXECUTION_MESSAGE_ARG) and self.current_parsed_json_result.get(PRE_EXECUTION_MESSAGE_ARG) != parsed_args.get(
+        if parsed_args.get(PRE_EXECUTION_MESSAGE_ARG) and parsed_args[PRE_EXECUTION_MESSAGE_ARG] != self.current_parsed_json_result.get(
            PRE_EXECUTION_MESSAGE_ARG
        ):
-            if parsed_args != self.current_parsed_json_result:
-                self.current_parsed_json_result = parsed_args
-                synthetic_chunk = ChatCompletionChunk(
+            # Extract old and new message content
+            old = self.current_parsed_json_result.get(PRE_EXECUTION_MESSAGE_ARG, "")
+            new = parsed_args[PRE_EXECUTION_MESSAGE_ARG]
+
+            # Compute the new content by slicing off the old prefix
+            content = new[len(old) :] if old else new
+
+            # Update current state
+            self.current_parsed_json_result = parsed_args
+
+            # Yield the formatted SSE chunk
+            yield _format_sse_chunk(
+                ChatCompletionChunk(
                    id=chunk.id,
                    object=chunk.object,
                    created=chunk.created,
                    model=chunk.model,
-                    choices=[
-                        Choice(
-                            index=0,
-                            delta=ChoiceDelta(content=tool_call.function.arguments, role="assistant"),
-                            finish_reason=None,
-                        )
-                    ],
+                    choices=[Choice(index=0, delta=ChoiceDelta(content=content, role="assistant"), finish_reason=None)],
                )
-                yield _format_sse_chunk(synthetic_chunk)
+            )

    def _handle_finish_reason(self, finish_reason: Optional[str]) -> bool:
        """Handles the finish reason and determines if streaming should stop."""