From 5e8091a5a536721aa43bede5bec05f1612f58e14 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Tue, 2 Apr 2024 10:49:41 -0700 Subject: [PATCH] fix: skip empty string passages returned by llama index parsing (#1208) --- memgpt/data_sources/connectors.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/memgpt/data_sources/connectors.py b/memgpt/data_sources/connectors.py index 658fe802..36115636 100644 --- a/memgpt/data_sources/connectors.py +++ b/memgpt/data_sources/connectors.py @@ -55,6 +55,16 @@ def load_data( # generate passages for passage_text, passage_metadata in connector.generate_passages([document], chunk_size=embedding_config.embedding_chunk_size): + + # for some reason, llama index parsers sometimes return empty strings + if len(passage_text) == 0: + typer.secho( + f"Warning: Llama index parser returned empty string, skipping insert of passage with metadata '{passage_metadata}' into VectorDB. You can usually ignore this warning.", + fg=typer.colors.YELLOW, + ) + continue + + # get embedding try: embedding = embed_model.get_text_embedding(passage_text) except Exception as e: