From e428e3666b8c8a0b9b38335e18e44a2266a7ac85 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sat, 21 Oct 2023 14:18:43 -0700
Subject: [PATCH] Revert "Merge pull request #71 from cpacker/pdf-support"

This reverts commit 1d566258642268084bf5029cb76c778cf19b9281, reversing
changes made to b30c7836f011b3d2c336b5dd82c1136134ac58df.
---
 memgpt/utils.py  | 13 +------------
 requirements.txt |  1 -
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/memgpt/utils.py b/memgpt/utils.py
index d8308983..77658228 100644
--- a/memgpt/utils.py
+++ b/memgpt/utils.py
@@ -11,7 +11,6 @@ import faiss
 import tiktoken
 import glob
 import sqlite3
-import fitz
 from tqdm import tqdm
 from memgpt.openai_tools import async_get_embedding_with_backoff
 
@@ -99,12 +98,6 @@ def read_in_chunks(file_object, chunk_size):
             break
         yield data
 
-def read_pdf_in_chunks(file, chunk_size):
-    doc = fitz.open(file)
-    for page in doc:
-        text = page.get_text()
-        yield text
-
 def read_in_rows_csv(file_object, chunk_size):
     csvreader = csv.reader(file_object)
     header = next(csvreader)
@@ -130,11 +123,7 @@ def total_bytes(pattern):
 def chunk_file(file, tkns_per_chunk=300, model='gpt-4'):
     encoding = tiktoken.encoding_for_model(model)
     with open(file, 'r') as f:
-        if file.endswith('.pdf'):
-            lines = [l for l in read_pdf_in_chunks(file, tkns_per_chunk*8)]
-            if len(lines) == 0:
-                print(f"Warning: {file} did not have any extractable text.")
-        elif file.endswith('.csv'):
+        if file.endswith('.csv'):
             lines = [l for l in read_in_rows_csv(f, tkns_per_chunk*8)]
         else:
             lines = [l for l in read_in_chunks(f, tkns_per_chunk*4)]
diff --git a/requirements.txt b/requirements.txt
index 484bc1e2..6258b856 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,6 @@ geopy
 numpy
 openai
 pybars3
-pymupdf
 python-dotenv
 pytz
 rich