fix: Fix duplicate file blocks (#3105)

This commit is contained in:
Matthew Zhou
2025-06-30 11:35:03 -07:00
committed by GitHub
parent a6b991df93
commit 499bdcbf5f
3 changed files with 34 additions and 4 deletions

View File

@@ -1,7 +1,8 @@
import logging
from typing import TYPE_CHECKING, List, Optional
from jinja2 import Template, TemplateSyntaxError
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator
# Forward referencing to avoid circular import with Agent -> Memory -> Agent
if TYPE_CHECKING:
@@ -69,6 +70,30 @@ class Memory(BaseModel, validate_assignment=True):
default_factory=list, description="Blocks representing the agent's in-context memory of an attached file"
)
@field_validator("file_blocks")
@classmethod
def validate_file_blocks_no_duplicates(cls, v: List[Block]) -> List[Block]:
"""Validate that file_blocks don't contain duplicate labels, log warnings and remove duplicates."""
if not v:
return v
seen_labels = set()
unique_blocks = []
duplicate_labels = []
for block in v:
if block.label in seen_labels:
duplicate_labels.append(block.label)
else:
seen_labels.add(block.label)
unique_blocks.append(block)
if duplicate_labels:
logger = logging.getLogger(__name__)
logger.warning(f"Duplicate block labels found in file_blocks: {duplicate_labels}. Removing duplicates.")
return unique_blocks
# Memory.template is a Jinja2 template for compiling memory module into a prompt string.
prompt_template: str = Field(
default="{% for block in blocks %}"

View File

@@ -1730,7 +1730,9 @@ class AgentManager:
agent_state.memory.blocks = [b for b in blocks if b is not None]
if file_block_names:
file_blocks = await self.file_agent_manager.get_all_file_blocks_by_name(file_names=file_block_names, actor=actor)
file_blocks = await self.file_agent_manager.get_all_file_blocks_by_name(
file_names=file_block_names, agent_id=agent_state.id, actor=actor
)
agent_state.memory.file_blocks = [b for b in file_blocks if b is not None]
return agent_state

View File

@@ -165,17 +165,19 @@ class FileAgentManager:
self,
*,
file_names: List[str],
agent_id: str,
actor: PydanticUser,
) -> List[PydanticBlock]:
"""
Retrieve multiple FileAgent associations by their IDs in a single query.
Retrieve multiple FileAgent associations by their file names for a specific agent.
Args:
file_names: List of file names to retrieve
agent_id: ID of the agent to retrieve file blocks for
actor: The user making the request
Returns:
List of PydanticFileAgent objects found (may be fewer than requested if some IDs don't exist)
List of PydanticBlock objects found (may be fewer than requested if some file names don't exist)
"""
if not file_names:
return []
@@ -185,6 +187,7 @@ class FileAgentManager:
query = select(FileAgentModel).where(
and_(
FileAgentModel.file_name.in_(file_names),
FileAgentModel.agent_id == agent_id,
FileAgentModel.organization_id == actor.organization_id,
)
)