From 4bee883bf55838621d6cb472850f4f5d398bb5bd Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Fri, 1 Dec 2023 17:45:25 -0800 Subject: [PATCH] Upgrade to `llama_index=0.9.10` (#556) * minor fix * forgot to add embedding file * upgrade llama index --- poetry.lock | 186 ++++++++++++++++++------------------------------- pyproject.toml | 2 +- 2 files changed, 67 insertions(+), 121 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4aea385f..c6ff3f56 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -196,6 +196,24 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["attrs[tests-no-zope]", "zope-interface"] tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "23.11.0" @@ -530,6 +548,17 @@ files = [ {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, ] +[[package]] +name = "distro" +version = "1.8.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.8.0-py3-none-any.whl", hash = "sha256:99522ca3e365cac527b44bde033f64c6945d90eb9f769703caaec52b09bbd3ff"}, + {file = "distro-1.8.0.tar.gz", hash = "sha256:02e111d1dc6a50abb8eed6bf31c3e48ed8b0830d1ea2a1b78c61765c2513fdd8"}, +] + [[package]] name = "docstring-parser" version = "0.15" @@ -903,31 +932,6 @@ files = [ {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, ] -[[package]] -name = "jsonpatch" -version = "1.33" -description = "Apply JSON-Patches (RFC 6902)" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" -files = [ - {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, - {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, -] - -[package.dependencies] -jsonpointer = ">=1.9" - -[[package]] -name = "jsonpointer" -version = "2.4" -description = "Identify specific nodes in a JSON document (RFC 6901)" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" -files = [ - {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, - {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, -] - [[package]] name = "lancedb" version = "0.3.4" @@ -962,112 +966,42 @@ docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] embeddings = ["InstructorEmbedding", "cohere", "open-clip-torch", "openai", "pillow", "sentence-transformers", "torch"] tests = ["pandas (>=1.4)", "pytest", "pytest-asyncio", "pytest-mock", "requests"] -[[package]] -name = "langchain" -version = "0.0.343" -description = "Building applications with LLMs through composability" -optional = false -python-versions = ">=3.8.1,<4.0" -files = [ - {file = "langchain-0.0.343-py3-none-any.whl", hash = "sha256:1959336b6076066bf233dd99dce44be2e9adccb53d799bff92c653098178b347"}, - {file = "langchain-0.0.343.tar.gz", hash = "sha256:166924d771a463009277f688f6dfc829a3af2d9cd5b41a64a7a6bd7860280e81"}, -] - -[package.dependencies] -aiohttp = ">=3.8.3,<4.0.0" -anyio = "<4.0" -async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} -dataclasses-json = ">=0.5.7,<0.7" -jsonpatch = ">=1.33,<2.0" -langchain-core = ">=0.0.7,<0.1" -langsmith = ">=0.0.63,<0.1.0" -numpy = ">=1,<2" -pydantic = ">=1,<3" -PyYAML = ">=5.3" -requests = ">=2,<3" -SQLAlchemy = ">=1.4,<3" -tenacity = ">=8.1.0,<9.0.0" - -[package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] -azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"] -clarifai = ["clarifai (>=9.1.0)"] -cli = ["typer (>=0.9.0,<0.10.0)"] -cohere = ["cohere (>=4,<5)"] -docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] -embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.6.0,<0.7.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] -javascript = ["esprima (>=4.0.1,<5.0.0)"] -llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] -openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"] -qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] -text-helpers = ["chardet (>=5.1.0,<6.0.0)"] - -[[package]] -name = "langchain-core" -version = "0.0.7" -description = "Building applications with LLMs through composability" -optional = false -python-versions = ">=3.8.1,<4.0" -files = [ - {file = "langchain_core-0.0.7-py3-none-any.whl", hash = "sha256:368ae70a1da56971642df0a9ede5f480d762224238ba84d0f9b2cd7c776150de"}, - {file = "langchain_core-0.0.7.tar.gz", hash = "sha256:2310df8b783194ec2dfe01c2864bd8b3ccb4adecb02b17cf1d63cc773c252b4a"}, -] - -[package.dependencies] -jsonpatch = ">=1.33,<2.0" -langsmith = ">=0.0.63,<0.1.0" -pydantic = ">=1,<3" -tenacity = ">=8.1.0,<9.0.0" - -[[package]] -name = "langsmith" -version = "0.0.67" -description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." -optional = false -python-versions = ">=3.8.1,<4.0" -files = [ - {file = "langsmith-0.0.67-py3-none-any.whl", hash = "sha256:66a257b97dabd43a7e62af271b2ddb7566167ce4e446fd7b7760e97d6ce84a5e"}, - {file = "langsmith-0.0.67.tar.gz", hash = "sha256:cef00bac2e7455a5943f3afaea91c032db1a1f2adb83003159a71e884fb5a9a2"}, -] - -[package.dependencies] -pydantic = ">=1,<3" -requests = ">=2,<3" - [[package]] name = "llama-index" -version = "0.8.62" +version = "0.9.10" description = "Interface between LLMs and your data" optional = false python-versions = ">=3.8.1,<3.12" files = [ - {file = "llama_index-0.8.62-py3-none-any.whl", hash = "sha256:5ea95e1a1ec0f759e29093c92cdfd3f1c780d3c638a306b86aa22993ab15ce80"}, - {file = "llama_index-0.8.62.tar.gz", hash = "sha256:c0db90f49ca8a11777b14e2a72921bef1edbf21ac5564651911999a9913f14ae"}, + {file = "llama_index-0.9.10-py3-none-any.whl", hash = "sha256:475678eea433b2e209a4faee768c67f1e7a58ba3ffd441a82c3387585e79b24e"}, + {file = "llama_index-0.9.10.tar.gz", hash = "sha256:d42f035caa206f3110c5c8e908f3c6e2dd3a1bd59c8ba5afe5466d338d230109"}, ] [package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" aiostream = ">=0.5.2,<0.6.0" -dataclasses-json = ">=0.5.7,<0.6.0" +beautifulsoup4 = ">=4.12.2,<5.0.0" +dataclasses-json = "*" deprecated = ">=1.2.9.3" fsspec = ">=2023.5.0" -langchain = ">=0.0.303" +httpx = "*" nest-asyncio = ">=1.5.8,<2.0.0" nltk = ">=3.8.1,<4.0.0" numpy = "*" -openai = "<1" +openai = ">=1.1.0" pandas = "*" +requests = ">=2.31.0" SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} tenacity = ">=8.2.0,<9.0.0" tiktoken = ">=0.3.3" typing-extensions = ">=4.5.0" typing-inspect = ">=0.8.0" -urllib3 = "<2" [package.extras] +langchain = ["langchain (>=0.0.303)"] local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.34.0,<5.0.0)"] postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg-binary (>=3.1.12,<4.0.0)"] -query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn (<1.3.0)", "spacy (>=3.7.1,<4.0.0)"] +query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] [[package]] name = "markdown-it-py" @@ -1588,25 +1522,26 @@ files = [ [[package]] name = "openai" -version = "0.28.1" -description = "Python client library for the OpenAI API" +version = "1.3.7" +description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-0.28.1-py3-none-any.whl", hash = "sha256:d18690f9e3d31eedb66b57b88c2165d760b24ea0a01f150dd3f068155088ce68"}, - {file = "openai-0.28.1.tar.gz", hash = "sha256:4be1dad329a65b4ce1a660fe6d5431b438f429b5855c883435f0f7fcb6d2dcc8"}, + {file = "openai-1.3.7-py3-none-any.whl", hash = "sha256:e5c51367a910297e4d1cd33d2298fb87d7edf681edbe012873925ac16f95bee0"}, + {file = "openai-1.3.7.tar.gz", hash = "sha256:18074a0f51f9b49d1ae268c7abc36f7f33212a0c0d08ce11b7053ab2d17798de"}, ] [package.dependencies] -aiohttp = "*" -requests = ">=2.20" -tqdm = "*" +anyio = ">=3.5.0,<4" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.5,<5" [package.extras] -datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] -embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] -wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] [[package]] name = "overrides" @@ -2341,7 +2276,7 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, @@ -2768,6 +2703,17 @@ files = [ {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, ] +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + [[package]] name = "sqlalchemy" version = "2.0.23" @@ -3718,4 +3664,4 @@ postgres = ["pg8000", "pgvector", "psycopg", "psycopg-binary", "psycopg2-binary" [metadata] lock-version = "2.0" python-versions = "<3.12,>=3.9" -content-hash = "d4a3af7c9778a2ce0e66bd2f73b8d5c69fc1de473551b0ed52594678baa44d12" +content-hash = "72d6f7fb6e619b61c886bada56262d08940b1bc110fd2b1397efc2c2d5a82a68" diff --git a/pyproject.toml b/pyproject.toml index 99016559..75a62d42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ pytz = "^2023.3.post1" tqdm = "^4.66.1" black = { version = "^23.10.1", optional = true } pytest = { version = "^7.4.3", optional = true } -llama-index = "^0.8.53.post3" +llama-index = "0.9.10" setuptools = "^68.2.2" datasets = { version = "^2.14.6", optional = true} prettytable = "^3.9.0"