From 0da0cd761bade7712559d72cfcb9b1e076bc7bee Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Sat, 30 Nov 2024 16:35:04 +0100
Subject: [PATCH 01/23] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index b9ea686b4..ac03bfd56 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ We build for developers who need a reliable, production-ready data layer for AI
## What is cognee?
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
-Try it in a Google Colab notebook or have a look at our documentation
+Try it in a Google Colab notebook or have a look at our documentation
If you have questions, join our Discord community
From d855e2ee88e616ea65e5a435410d4c6ac459a736 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Sat, 30 Nov 2024 16:36:34 +0100
Subject: [PATCH 02/23] Update README.md
---
README.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index ac03bfd56..1cec3f051 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ import cognee
cognee.config.set_llm_api_key("YOUR_OPENAI_API_KEY")
```
You can also set the variables by creating .env file, here is our template.
-To use different LLM providers, for more info check out our documentation
+To use different LLM providers, for more info check out our documentation
If you are using Network, create an account on Graphistry to visualize results:
```
@@ -282,7 +282,7 @@ Check out our demo notebook [here](https://github.com/topoteretes/cognee/blob/ma
### Install Server
-Please see the [cognee Quick Start Guide](https://topoteretes.github.io/cognee/quickstart/) for important configuration information.
+Please see the [cognee Quick Start Guide](https://docs.cognee.ai/quickstart/) for important configuration information.
```bash
docker compose up
@@ -291,7 +291,7 @@ docker compose up
### Install SDK
-Please see the cognee [Development Guide](https://topoteretes.github.io/cognee/quickstart/) for important beta information and usage instructions.
+Please see the cognee [Development Guide](https://docs.cognee.ai/quickstart/) for important beta information and usage instructions.
```bash
pip install cognee
From 491dc4967925327d57f5d1d230072df4c46c016e Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 14:00:17 +0100
Subject: [PATCH 03/23] refactor: Change proxy url to be to custom domain
Added custom domain for proxy use
Refactor COG-741
---
cognee/shared/utils.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py
index a1792a2ed..315e234f1 100644
--- a/cognee/shared/utils.py
+++ b/cognee/shared/utils.py
@@ -17,7 +17,7 @@ from uuid import uuid4
import pathlib
# Analytics Proxy Url, currently hosted by Vercel
-vercel_url = "https://proxyanalytics.vercel.app"
+proxy_url = "https://test.prometh.ai"
def get_anonymous_id():
"""Creates or reads a anonymous user id"""
@@ -57,7 +57,7 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
},
}
- response = requests.post(vercel_url, json=payload)
+ response = requests.post(proxy_url, json=payload)
if response.status_code != 200:
print(f"Error sending telemetry through proxy: {response.status_code}")
From 10dc6b152474eb5623145670857bc62f21922aec Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 14:38:41 +0100
Subject: [PATCH 04/23] chore: Add optional dependencies
Change intended optional dependencies to really be optional
Chore COG-595
---
poetry.lock | 32 ++++++++++++++++----------------
pyproject.toml | 10 +++++-----
2 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index 96f9aec27..08098caad 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -406,7 +406,7 @@ files = [
name = "asyncpg"
version = "0.30.0"
description = "An asyncio PostgreSQL driver"
-optional = false
+optional = true
python-versions = ">=3.8.0"
files = [
{file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"},
@@ -491,7 +491,7 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
name = "authlib"
version = "1.3.2"
description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "Authlib-1.3.2-py2.py3-none-any.whl", hash = "sha256:ede026a95e9f5cdc2d4364a52103f5405e75aa156357e831ef2bfd0bc5094dfc"},
@@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5"
name = "grpcio"
version = "1.67.1"
description = "HTTP/2-based RPC framework"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
@@ -2108,7 +2108,7 @@ protobuf = ["grpcio-tools (>=1.67.1)"]
name = "grpcio-health-checking"
version = "1.67.1"
description = "Standard Health Checking Service for gRPC"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "grpcio_health_checking-1.67.1-py3-none-any.whl", hash = "sha256:93753da5062152660aef2286c9b261e07dd87124a65e4dc9fbd47d1ce966b39d"},
@@ -2123,7 +2123,7 @@ protobuf = ">=5.26.1,<6.0dev"
name = "grpcio-tools"
version = "1.67.1"
description = "Protobuf code generator for gRPC"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"},
@@ -2223,7 +2223,7 @@ files = [
name = "h2"
version = "4.1.0"
description = "HTTP/2 State-Machine based protocol implementation"
-optional = false
+optional = true
python-versions = ">=3.6.1"
files = [
{file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"},
@@ -2254,7 +2254,7 @@ test = ["eth-utils (>=2.0.0)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>=7.0
name = "hpack"
version = "4.0.0"
description = "Pure-Python HPACK header compression"
-optional = false
+optional = true
python-versions = ">=3.6.1"
files = [
{file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"},
@@ -2369,7 +2369,7 @@ tests = ["freezegun", "pytest", "pytest-cov"]
name = "hyperframe"
version = "6.0.1"
description = "HTTP/2 framing layer for Python"
-optional = false
+optional = true
python-versions = ">=3.6.1"
files = [
{file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
@@ -4067,7 +4067,7 @@ test = ["pep440", "pre-commit", "pytest", "testpath"]
name = "neo4j"
version = "5.26.0"
description = "Neo4j Bolt driver for Python"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "neo4j-5.26.0-py3-none-any.whl", hash = "sha256:511a6a9468ca89b521bf686f885a2070acc462b1d09821d43710bd477acdf11e"},
@@ -4613,7 +4613,7 @@ ptyprocess = ">=0.5"
name = "pgvector"
version = "0.3.6"
description = "pgvector support for Python"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "pgvector-0.3.6-py3-none-any.whl", hash = "sha256:f6c269b3c110ccb7496bac87202148ed18f34b390a0189c783e351062400a75a"},
@@ -4761,7 +4761,7 @@ files = [
name = "portalocker"
version = "2.10.1"
description = "Wraps the portalocker recipe for easy usage"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"},
@@ -4938,7 +4938,7 @@ files = [
name = "protobuf"
version = "5.28.3"
description = ""
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"},
@@ -5739,7 +5739,7 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""}
name = "qdrant-client"
version = "1.12.1"
description = "Client library for the Qdrant vector search engine"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"},
@@ -7144,7 +7144,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
name = "validators"
version = "0.33.0"
description = "Python Data Validation for Humans™"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "validators-0.33.0-py3-none-any.whl", hash = "sha256:134b586a98894f8139865953899fc2daeb3d0c35569552c5518f089ae43ed075"},
@@ -7211,7 +7211,7 @@ files = [
name = "weaviate-client"
version = "4.6.7"
description = "A python native Weaviate client"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "weaviate_client-4.6.7-py3-none-any.whl", hash = "sha256:8793de35264cab33a84fe8cb8c422a257fe4d8334657aaddd8ead853da3fb34a"},
@@ -7641,4 +7641,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
-content-hash = "474ae44ef721bf9b2d34d1cd139cddf42542ef9167895960784b6e88214dd1e6"
+content-hash = "bf097b51f6147b82c63f67c34885981c2166b50755b0f3e690ab1c1e55fa56ee"
diff --git a/pyproject.toml b/pyproject.toml
index 92b70db63..b6fc10815 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,13 +43,13 @@ filetype = "^1.2.0"
nltk = "^3.8.1"
dlt = {extras = ["sqlalchemy"], version = "^1.3.0"}
aiofiles = "^23.2.1"
-qdrant-client = "^1.9.0"
+qdrant-client = {version = "^1.9.0", optional = true}
graphistry = "^0.33.5"
tenacity = "^8.4.1"
-weaviate-client = "4.6.7"
+weaviate-client = {version = "4.6.7", optional = true}
scikit-learn = "^1.5.0"
pypdf = "^4.1.0"
-neo4j = "^5.20.0"
+neo4j = {version = "^5.20.0", optional = true}
jinja2 = "^3.1.3"
matplotlib = "^3.8.3"
tiktoken = "0.7.0"
@@ -66,8 +66,8 @@ anthropic = "^0.26.1"
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
fastapi-users = {version = "*", extras = ["sqlalchemy"]}
alembic = "^1.13.3"
-asyncpg = "0.30.0"
-pgvector = "^0.3.5"
+asyncpg = {version = "0.30.0", optional = true}
+pgvector = {version = "^0.3.5", optional = true}
psycopg2 = {version = "^2.9.10", optional = true}
llama-index-core = {version = "^0.11.22", optional = true}
From f41228aa51b0c0a254a0fcc0465a115b6e2fe538 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 14:52:24 +0100
Subject: [PATCH 05/23] fix: Resolve import of optional package
Resolve issue with forced import of optional pgvector package.
Fix COG-595
---
.../databases/vector/pgvector/PGVectorAdapter.py | 4 +++-
.../databases/vector/pgvector/create_db_and_tables.py | 1 -
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
index fd0fd493c..74a32511d 100644
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@@ -1,6 +1,5 @@
import asyncio
from uuid import UUID
-from pgvector.sqlalchemy import Vector
from typing import List, Optional, get_type_hints
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import JSON, Column, Table, select, delete
@@ -68,6 +67,8 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
vector_size = self.embedding_engine.get_vector_size()
if not await self.has_collection(collection_name):
+
+ from pgvector.sqlalchemy import Vector
class PGVectorDataPoint(Base):
__tablename__ = collection_name
__table_args__ = {"extend_existing": True}
@@ -105,6 +106,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
vector_size = self.embedding_engine.get_vector_size()
+ from pgvector.sqlalchemy import Vector
class PGVectorDataPoint(Base):
__tablename__ = collection_name
__table_args__ = {"extend_existing": True}
diff --git a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py
index f40299939..1900cfe88 100644
--- a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py
+++ b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py
@@ -1,4 +1,3 @@
-from ...relational.ModelBase import Base
from ..get_vector_engine import get_vector_engine, get_vectordb_config
from sqlalchemy import text
From 13b79320b62cddeef6217665883cb8757e510132 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 14:54:20 +0100
Subject: [PATCH 06/23] fix: Resolve issue with gh actions not installing
optional packages
Add install of optional databases packages for database gh actions
Fix COG-595
---
.github/workflows/test_neo4j.yml | 2 +-
.github/workflows/test_qdrant.yml | 2 +-
.github/workflows/test_weaviate.yml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/test_neo4j.yml b/.github/workflows/test_neo4j.yml
index 0b47a55fc..69aca2473 100644
--- a/.github/workflows/test_neo4j.yml
+++ b/.github/workflows/test_neo4j.yml
@@ -46,7 +46,7 @@ jobs:
installer-parallel: true
- name: Install dependencies
- run: poetry install --no-interaction
+ run: poetry install -E neo4j --no-interaction
- name: Run default Neo4j
env:
diff --git a/.github/workflows/test_qdrant.yml b/.github/workflows/test_qdrant.yml
index a6347bd0d..4291801f3 100644
--- a/.github/workflows/test_qdrant.yml
+++ b/.github/workflows/test_qdrant.yml
@@ -47,7 +47,7 @@ jobs:
installer-parallel: true
- name: Install dependencies
- run: poetry install --no-interaction
+ run: poetry install -E qdrant --no-interaction
- name: Run default Qdrant
env:
diff --git a/.github/workflows/test_weaviate.yml b/.github/workflows/test_weaviate.yml
index 490f9075a..67968e6a6 100644
--- a/.github/workflows/test_weaviate.yml
+++ b/.github/workflows/test_weaviate.yml
@@ -47,7 +47,7 @@ jobs:
installer-parallel: true
- name: Install dependencies
- run: poetry install --no-interaction
+ run: poetry install -E weaviate --no-interaction
- name: Run default Weaviate
env:
From 63e687978ebd0300613035dd8094a3ded8686279 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 15:30:30 +0100
Subject: [PATCH 07/23] chore: Remove falkordb and posthog non optional
dependency
Switched falkordb and posthog to be optional dependencies
Chore COG-595
---
poetry.lock | 12 +++++++-----
pyproject.toml | 6 ++++--
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index 08098caad..e0848cbab 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1485,7 +1485,7 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth
name = "falkordb"
version = "1.0.9"
description = "Python client for interacting with FalkorDB database"
-optional = false
+optional = true
python-versions = "<4.0,>=3.8"
files = [
{file = "falkordb-1.0.9.tar.gz", hash = "sha256:177008e63c7e4d9ebbdfeb8cad24b0e49175bb0f6e96cac9b4ffb641c0eff0f1"},
@@ -3783,7 +3783,7 @@ mkdocstrings = ">=0.26"
name = "monotonic"
version = "1.6"
description = "An implementation of time.monotonic() for Python 2 & < 3.3"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"},
@@ -4780,7 +4780,7 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p
name = "posthog"
version = "3.7.0"
description = "Integrate PostHog into any python application."
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "posthog-3.7.0-py2.py3-none-any.whl", hash = "sha256:3555161c3a9557b5666f96d8e1f17f410ea0f07db56e399e336a1656d4e5c722"},
@@ -5763,7 +5763,7 @@ fastembed-gpu = ["fastembed-gpu (==0.3.6)"]
name = "redis"
version = "5.2.0"
description = "Python client for Redis database and key-value store"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "redis-5.2.0-py3-none-any.whl", hash = "sha256:ae174f2bb3b1bf2b09d54bf3e51fbc1469cf6c10aa03e21141f51969801a7897"},
@@ -7630,15 +7630,17 @@ type = ["pytest-mypy"]
[extras]
cli = []
+falkordb = ["falkordb"]
filesystem = ["botocore"]
llama-index = ["llama-index-core"]
neo4j = ["neo4j"]
notebook = []
postgres = ["asyncpg", "pgvector", "psycopg2"]
+posthog = ["posthog"]
qdrant = ["qdrant-client"]
weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
-content-hash = "bf097b51f6147b82c63f67c34885981c2166b50755b0f3e690ab1c1e55fa56ee"
+content-hash = "12148f1911ef4d74f01e6cfd7ee071ea8a9dfe435c55ab6124ac811669450e67"
diff --git a/pyproject.toml b/pyproject.toml
index b6fc10815..9bebc33b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ typing_extensions = "4.12.2"
nest_asyncio = "1.6.0"
numpy = "1.26.4"
datasets = "3.1.0"
-falkordb = "1.0.9"
+falkordb = {version = "1.0.9", optional = true}
boto3 = "^1.26.125"
botocore="^1.35.54"
gunicorn = "^20.1.0"
@@ -56,7 +56,7 @@ tiktoken = "0.7.0"
langchain_text_splitters = "0.3.2"
langsmith = "0.1.139"
langdetect = "1.0.9"
-posthog = "^3.5.0"
+posthog = {version = "^3.5.0", optional = true}
lancedb = "0.15.0"
litellm = "1.49.1"
groq = "0.8.0"
@@ -80,6 +80,8 @@ neo4j = ["neo4j"]
postgres = ["psycopg2", "pgvector", "asyncpg"]
notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
llama-index = ["llama-index-core"]
+posthog = ["posthog"]
+falkordb = ["falkordb"]
[tool.poetry.group.dev.dependencies]
From 836e3d29e178e8afe3fa39c5c52335eab6f93b80 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 16:24:12 +0100
Subject: [PATCH 08/23] chore: Update README.md
Update README.md with database optional dependencies
Chore COG-595
---
README.md | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/README.md b/README.md
index 1cec3f051..d0b7ba778 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,24 @@ pip install cognee
pip install 'cognee[postgres]'
```
+### With pip with Weaviate support
+
+```bash
+pip install 'cognee[weaviate]'
+```
+
+### With pip with Qdrant support
+
+```bash
+pip install 'cognee[qdrant]'
+```
+
+### With pip with Neo4j support
+
+```bash
+pip install 'cognee[neo4j]'
+```
+
### With poetry
```bash
@@ -44,6 +62,24 @@ poetry add cognee
poetry add cognee -E postgres
```
+### With poetry with Weaviate support
+
+```bash
+poetry add cognee -E weaviate
+```
+
+### With poetry with Qdrant support
+
+```bash
+poetry add cognee -E qdrant
+```
+
+### With poetry with Neo4j support
+
+```bash
+poetry add cognee -E neo4j
+```
+
## 💻 Basic Usage
From ad56ff9c1aac775a5e3fb6d9329bfc38c11286f9 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 16:47:26 +0100
Subject: [PATCH 09/23] chore: Add langchain as optional dependency
Added langchain as optional dependency
Chore COG-595
---
poetry.lock | 136 ++++++++++++++++++++++++-------------------------
pyproject.toml | 10 ++--
2 files changed, 73 insertions(+), 73 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index e0848cbab..7584a106d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -247,7 +247,7 @@ trio = ["trio (>=0.26.1)"]
name = "appnope"
version = "0.1.4"
description = "Disable App Nap on macOS >= 10.9"
-optional = false
+optional = true
python-versions = ">=3.6"
files = [
{file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"},
@@ -315,7 +315,7 @@ tests = ["pytest"]
name = "arrow"
version = "1.3.0"
description = "Better dates & times for Python"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80"},
@@ -348,7 +348,7 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
name = "asttokens"
version = "2.4.1"
description = "Annotate AST trees with source code positions"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"},
@@ -381,7 +381,7 @@ wheel = ">=0.23.0,<1.0"
name = "async-lru"
version = "2.0.4"
description = "Simple LRU cache for asyncio"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"},
@@ -570,7 +570,7 @@ typecheck = ["mypy"]
name = "beautifulsoup4"
version = "4.12.3"
description = "Screen-scraping library"
-optional = false
+optional = true
python-versions = ">=3.6.0"
files = [
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
@@ -591,7 +591,7 @@ lxml = ["lxml"]
name = "bleach"
version = "6.2.0"
description = "An easy safelist-based HTML-sanitizing tool."
-optional = false
+optional = true
python-versions = ">=3.9"
files = [
{file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"},
@@ -889,7 +889,7 @@ files = [
name = "comm"
version = "0.2.2"
description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"},
@@ -1226,7 +1226,7 @@ files = [
name = "decorator"
version = "5.1.1"
description = "Decorators for Humans"
-optional = false
+optional = true
python-versions = ">=3.5"
files = [
{file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
@@ -1237,7 +1237,7 @@ files = [
name = "defusedxml"
version = "0.7.1"
description = "XML bomb protection for Python stdlib modules"
-optional = false
+optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
{file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
@@ -1471,7 +1471,7 @@ test = ["pytest (>=6)"]
name = "executing"
version = "2.1.0"
description = "Get the currently executing AST node of a frame, and other information"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf"},
@@ -1558,7 +1558,7 @@ sqlalchemy = {version = ">=2.0.0,<2.1.0", extras = ["asyncio"]}
name = "fastjsonschema"
version = "2.20.0"
description = "Fastest Python implementation of JSON schema"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "fastjsonschema-2.20.0-py3-none-any.whl", hash = "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a"},
@@ -1670,7 +1670,7 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
name = "fqdn"
version = "1.5.1"
description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers"
-optional = false
+optional = true
python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4"
files = [
{file = "fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014"},
@@ -2483,7 +2483,7 @@ vertexai = ["google-cloud-aiplatform (>=1.53.0,<2.0.0)", "jsonref (>=1.1.0,<2.0.
name = "ipykernel"
version = "6.29.5"
description = "IPython Kernel for Jupyter"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5"},
@@ -2516,7 +2516,7 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
name = "ipython"
version = "8.18.1"
description = "IPython: Productive Interactive Computing"
-optional = false
+optional = true
python-versions = ">=3.9"
files = [
{file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"},
@@ -2553,7 +2553,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pa
name = "isoduration"
version = "20.11.0"
description = "Operations with ISO 8601 durations"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042"},
@@ -2581,7 +2581,7 @@ colors = ["colorama (>=0.4.6)"]
name = "jedi"
version = "0.19.2"
description = "An autocompletion tool for Python that can be used for text editors."
-optional = false
+optional = true
python-versions = ">=3.6"
files = [
{file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"},
@@ -2719,7 +2719,7 @@ files = [
name = "json5"
version = "0.9.27"
description = "A Python implementation of the JSON5 data format."
-optional = false
+optional = true
python-versions = ">=3.8.0"
files = [
{file = "json5-0.9.27-py3-none-any.whl", hash = "sha256:17b43d78d3a6daeca4d7030e9bf22092dba29b1282cc2d0cfa56f6febee8dc93"},
@@ -2733,7 +2733,7 @@ dev = ["build (==1.2.1)", "coverage (==7.5.3)", "mypy (==1.10.0)", "pip (==24.1)
name = "jsonpatch"
version = "1.33"
description = "Apply JSON-Patches (RFC 6902)"
-optional = false
+optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
files = [
{file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"},
@@ -2760,7 +2760,7 @@ ply = "*"
name = "jsonpointer"
version = "3.0.0"
description = "Identify specific nodes in a JSON document (RFC 6901)"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"},
@@ -2814,7 +2814,7 @@ referencing = ">=0.31.0"
name = "jupyter-client"
version = "8.6.3"
description = "Jupyter protocol implementation and client libraries"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f"},
@@ -2837,7 +2837,7 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt
name = "jupyter-core"
version = "5.7.2"
description = "Jupyter core package. A base package on which Jupyter projects rely."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"},
@@ -2857,7 +2857,7 @@ test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"
name = "jupyter-events"
version = "0.10.0"
description = "Jupyter Event System library"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyter_events-0.10.0-py3-none-any.whl", hash = "sha256:4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960"},
@@ -2882,7 +2882,7 @@ test = ["click", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.19.0)", "p
name = "jupyter-lsp"
version = "2.2.5"
description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyter-lsp-2.2.5.tar.gz", hash = "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001"},
@@ -2897,7 +2897,7 @@ jupyter-server = ">=1.1.2"
name = "jupyter-server"
version = "2.14.2"
description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyter_server-2.14.2-py3-none-any.whl", hash = "sha256:47ff506127c2f7851a17bf4713434208fc490955d0e8632e95014a9a9afbeefd"},
@@ -2933,7 +2933,7 @@ test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0,<9)", "pytest-console
name = "jupyter-server-terminals"
version = "0.5.3"
description = "A Jupyter Server Extension Providing Terminals."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa"},
@@ -2952,7 +2952,7 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (>
name = "jupyterlab"
version = "4.2.5"
description = "JupyterLab computational environment"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyterlab-4.2.5-py3-none-any.whl", hash = "sha256:73b6e0775d41a9fee7ee756c80f58a6bed4040869ccc21411dc559818874d321"},
@@ -2987,7 +2987,7 @@ upgrade-extension = ["copier (>=9,<10)", "jinja2-time (<0.3)", "pydantic (<3.0)"
name = "jupyterlab-pygments"
version = "0.3.0"
description = "Pygments theme using JupyterLab CSS variables"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780"},
@@ -2998,7 +2998,7 @@ files = [
name = "jupyterlab-server"
version = "2.27.3"
description = "A set of server components for JupyterLab and JupyterLab like applications."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "jupyterlab_server-2.27.3-py3-none-any.whl", hash = "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4"},
@@ -3181,7 +3181,7 @@ tests = ["aiohttp", "boto3", "duckdb", "pandas (>=1.4)", "polars (>=0.19,<=1.3.0
name = "langchain-core"
version = "0.3.15"
description = "Building applications with LLMs through composability"
-optional = false
+optional = true
python-versions = "<4.0,>=3.9"
files = [
{file = "langchain_core-0.3.15-py3-none-any.whl", hash = "sha256:3d4ca6dbb8ed396a6ee061063832a2451b0ce8c345570f7b086ffa7288e4fa29"},
@@ -3201,7 +3201,7 @@ typing-extensions = ">=4.7"
name = "langchain-text-splitters"
version = "0.3.2"
description = "LangChain text splitting utilities"
-optional = false
+optional = true
python-versions = "<4.0,>=3.9"
files = [
{file = "langchain_text_splitters-0.3.2-py3-none-any.whl", hash = "sha256:0db28c53f41d1bc024cdb3b1646741f6d46d5371e90f31e7e7c9fbe75d01c726"},
@@ -3254,7 +3254,7 @@ openai = ["openai (>=0.27.8)"]
name = "langsmith"
version = "0.1.139"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
-optional = false
+optional = true
python-versions = "<4.0,>=3.8.1"
files = [
{file = "langsmith-0.1.139-py3-none-any.whl", hash = "sha256:2a4a541bfbd0a9727255df28a60048c85bc8c4c6a276975923785c3fd82dc879"},
@@ -3559,7 +3559,7 @@ dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setupto
name = "matplotlib-inline"
version = "0.1.7"
description = "Inline Matplotlib backend for Jupyter"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
@@ -3606,7 +3606,7 @@ files = [
name = "mistune"
version = "3.0.2"
description = "A sane and fast Markdown parser with useful plugins and renderers"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205"},
@@ -3986,7 +3986,7 @@ files = [
name = "nbclient"
version = "0.10.0"
description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor."
-optional = false
+optional = true
python-versions = ">=3.8.0"
files = [
{file = "nbclient-0.10.0-py3-none-any.whl", hash = "sha256:f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f"},
@@ -4008,7 +4008,7 @@ test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=
name = "nbconvert"
version = "7.16.4"
description = "Converting Jupyter Notebooks (.ipynb files) to other formats. Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script. nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3"},
@@ -4046,7 +4046,7 @@ webpdf = ["playwright"]
name = "nbformat"
version = "5.10.4"
description = "The Jupyter Notebook format"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b"},
@@ -4140,7 +4140,7 @@ twitter = ["twython"]
name = "notebook"
version = "7.2.2"
description = "Jupyter Notebook - A web-based notebook environment for interactive computing"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "notebook-7.2.2-py3-none-any.whl", hash = "sha256:c89264081f671bc02eec0ed470a627ed791b9156cad9285226b31611d3e9fe1c"},
@@ -4163,7 +4163,7 @@ test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4
name = "notebook-shim"
version = "0.2.4"
description = "A shim layer for notebook traits and config"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "notebook_shim-0.2.4-py3-none-any.whl", hash = "sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef"},
@@ -4447,7 +4447,7 @@ xml = ["lxml (>=4.6.3)"]
name = "pandocfilters"
version = "1.5.1"
description = "Utilities for writing pandoc filters in python"
-optional = false
+optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
{file = "pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc"},
@@ -4458,7 +4458,7 @@ files = [
name = "parso"
version = "0.8.4"
description = "A Python Parser"
-optional = false
+optional = true
python-versions = ">=3.6"
files = [
{file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
@@ -4599,7 +4599,7 @@ test = ["time-machine (>=2.6.0)"]
name = "pexpect"
version = "4.9.0"
description = "Pexpect allows easy control of interactive console applications."
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
@@ -4803,7 +4803,7 @@ test = ["coverage", "django", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)"
name = "prometheus-client"
version = "0.21.0"
description = "Python client for the Prometheus monitoring system."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "prometheus_client-0.21.0-py3-none-any.whl", hash = "sha256:4fa6b4dd0ac16d58bb587c04b1caae65b8c5043e85f778f42f5f632f6af2e166"},
@@ -4817,7 +4817,7 @@ twisted = ["twisted"]
name = "prompt-toolkit"
version = "3.0.48"
description = "Library for building powerful interactive command lines in Python"
-optional = false
+optional = true
python-versions = ">=3.7.0"
files = [
{file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"},
@@ -4958,7 +4958,7 @@ files = [
name = "psutil"
version = "6.1.0"
description = "Cross-platform lib for process and system monitoring in Python."
-optional = false
+optional = true
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
files = [
{file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"},
@@ -5006,7 +5006,7 @@ files = [
name = "ptyprocess"
version = "0.7.0"
description = "Run a subprocess in a pseudo terminal"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
@@ -5017,7 +5017,7 @@ files = [
name = "pure-eval"
version = "0.2.3"
description = "Safely evaluate AST nodes without side effects"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"},
@@ -5467,7 +5467,7 @@ cli = ["click (>=5.0)"]
name = "python-json-logger"
version = "2.0.7"
description = "A python library adding a json log formatter"
-optional = false
+optional = true
python-versions = ">=3.6"
files = [
{file = "python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c"},
@@ -5500,7 +5500,7 @@ files = [
name = "pywin32"
version = "308"
description = "Python for Window Extensions"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
@@ -5527,7 +5527,7 @@ files = [
name = "pywinpty"
version = "2.0.14"
description = "Pseudo terminal support for Windows from Python."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "pywinpty-2.0.14-cp310-none-win_amd64.whl", hash = "sha256:0b149c2918c7974f575ba79f5a4aad58bd859a52fa9eb1296cc22aa412aa411f"},
@@ -5618,7 +5618,7 @@ pyyaml = "*"
name = "pyzmq"
version = "26.2.0"
description = "Python bindings for 0MQ"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "pyzmq-26.2.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629"},
@@ -5938,7 +5938,7 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
name = "requests-toolbelt"
version = "1.0.0"
description = "A utility belt for advanced users of python-requests"
-optional = false
+optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
{file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
@@ -5967,7 +5967,7 @@ types-setuptools = ">=69.1.0"
name = "rfc3339-validator"
version = "0.1.4"
description = "A pure python RFC3339 validator"
-optional = false
+optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
{file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"},
@@ -5981,7 +5981,7 @@ six = "*"
name = "rfc3986-validator"
version = "0.1.1"
description = "Pure python rfc3986 validator"
-optional = false
+optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
{file = "rfc3986_validator-0.1.1-py2.py3-none-any.whl", hash = "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9"},
@@ -6256,7 +6256,7 @@ files = [
name = "send2trash"
version = "1.8.3"
description = "Send file to trash natively under Mac OS X, Windows and Linux"
-optional = false
+optional = true
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
files = [
{file = "Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9"},
@@ -6510,7 +6510,7 @@ files = [
name = "soupsieve"
version = "2.6"
description = "A modern CSS selector implementation for Beautiful Soup."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
@@ -6619,7 +6619,7 @@ files = [
name = "stack-data"
version = "0.6.3"
description = "Extract data from python stack frames and tracebacks for informative displays"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
@@ -6671,7 +6671,7 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"]
name = "terminado"
version = "0.18.1"
description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0"},
@@ -6755,7 +6755,7 @@ blobfile = ["blobfile (>=2)"]
name = "tinycss2"
version = "1.4.0"
description = "A tiny CSS parser"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289"},
@@ -6924,7 +6924,7 @@ files = [
name = "tornado"
version = "6.4.1"
description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"},
@@ -6965,7 +6965,7 @@ telegram = ["requests"]
name = "traitlets"
version = "5.14.3"
description = "Traitlets Python configuration system"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
@@ -7020,7 +7020,7 @@ typing-extensions = ">=3.7.4.3"
name = "types-python-dateutil"
version = "2.9.0.20241003"
description = "Typing stubs for python-dateutil"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"},
@@ -7079,7 +7079,7 @@ files = [
name = "uri-template"
version = "1.3.0"
description = "RFC 6570 URI Template Processor"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "uri-template-1.3.0.tar.gz", hash = "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7"},
@@ -7200,7 +7200,7 @@ watchmedo = ["PyYAML (>=3.10)"]
name = "wcwidth"
version = "0.2.13"
description = "Measures the displayed width of unicode strings in a terminal"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
@@ -7232,7 +7232,7 @@ validators = "0.33.0"
name = "webcolors"
version = "24.11.1"
description = "A library for working with the color formats defined by HTML and CSS."
-optional = false
+optional = true
python-versions = ">=3.9"
files = [
{file = "webcolors-24.11.1-py3-none-any.whl", hash = "sha256:515291393b4cdf0eb19c155749a096f779f7d909f7cceea072791cb9095b92e9"},
@@ -7243,7 +7243,7 @@ files = [
name = "webencodings"
version = "0.5.1"
description = "Character encoding aliases for legacy web content"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
@@ -7254,7 +7254,7 @@ files = [
name = "websocket-client"
version = "1.8.0"
description = "WebSocket client for Python with low level API options"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"},
@@ -7629,9 +7629,9 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
type = ["pytest-mypy"]
[extras]
-cli = []
falkordb = ["falkordb"]
filesystem = ["botocore"]
+langchain = ["langchain_text_splitters", "langsmith"]
llama-index = ["llama-index-core"]
neo4j = ["neo4j"]
notebook = []
@@ -7643,4 +7643,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
-content-hash = "12148f1911ef4d74f01e6cfd7ee071ea8a9dfe435c55ab6124ac811669450e67"
+content-hash = "c707b7e45a49a3c8c00b71292ca42898f2c5905f1e62bc7c9bc48d7423e9d990"
diff --git a/pyproject.toml b/pyproject.toml
index 9bebc33b6..019415ee1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,8 +53,8 @@ neo4j = {version = "^5.20.0", optional = true}
jinja2 = "^3.1.3"
matplotlib = "^3.8.3"
tiktoken = "0.7.0"
-langchain_text_splitters = "0.3.2"
-langsmith = "0.1.139"
+langchain_text_splitters = {version = "0.3.2", optional = true}
+langsmith = {version = "0.1.139", optional = true}
langdetect = "1.0.9"
posthog = {version = "^3.5.0", optional = true}
lancedb = "0.15.0"
@@ -73,12 +73,12 @@ llama-index-core = {version = "^0.11.22", optional = true}
[tool.poetry.extras]
filesystem = ["s3fs", "botocore"]
-cli = ["pipdeptree", "cron-descriptor"]
weaviate = ["weaviate-client"]
qdrant = ["qdrant-client"]
neo4j = ["neo4j"]
postgres = ["psycopg2", "pgvector", "asyncpg"]
-notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
+notebook = ["notebook", "ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
+langchain = ["langsmith", "langchain_text_splitters"]
llama-index = ["llama-index-core"]
posthog = ["posthog"]
falkordb = ["falkordb"]
@@ -89,7 +89,7 @@ pytest = "^7.4.0"
pytest-asyncio = "^0.21.1"
coverage = "^7.3.2"
mypy = "^1.7.1"
-notebook = "^7.1.1"
+notebook = {version = "^7.1.1", optional = true}
deptry = "^0.20.0"
debugpy = "1.8.2"
pylint = "^3.0.3"
From 1a5f0fe10d966480b8b0d983ab87c7459ab78553 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 19:07:17 +0100
Subject: [PATCH 10/23] chore: Update readme with more description about extras
Update readme to be a bit more descriptive about installation
Chore COG-595
---
README.md | 3 +++
1 file changed, 3 insertions(+)
diff --git a/README.md b/README.md
index d0b7ba778..2b29f1448 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,9 @@ If you have questions, join our Discord
## 📦 Installation
+You can install Cognee using either **pip** or **poetry**.
+Support for various databases and vector stores is available through extras.
+
### With pip
```bash
From dcb320da525eee644e675fa044f46ffdb335f0c1 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Mon, 2 Dec 2024 19:35:50 +0100
Subject: [PATCH 11/23] chore: Add groq and langfuse as optional dependencies
Added groq and langfuse as optional dependencies
Chore COG-595
---
poetry.lock | 12 +++++++-----
pyproject.toml | 6 ++++--
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index 7584a106d..7d09c340d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -519,7 +519,7 @@ dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
name = "backoff"
version = "2.2.1"
description = "Function decoration for backoff and retry"
-optional = false
+optional = true
python-versions = ">=3.7,<4.0"
files = [
{file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
@@ -2022,7 +2022,7 @@ colorama = ">=0.4"
name = "groq"
version = "0.8.0"
description = "The official Python library for the groq API"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "groq-0.8.0-py3-none-any.whl", hash = "sha256:f5e4e892d45001241a930db451e633ca1f0007e3f749deaa5d7360062fcd61e3"},
@@ -3229,7 +3229,7 @@ six = "*"
name = "langfuse"
version = "2.53.9"
description = "A client library for accessing langfuse"
-optional = false
+optional = true
python-versions = "<4.0,>=3.8.1"
files = [
{file = "langfuse-2.53.9-py3-none-any.whl", hash = "sha256:04363bc323f7513621c88a997003f7b906ae8f5d096bd54221cfcb6bf7a6f16a"},
@@ -7306,7 +7306,7 @@ files = [
name = "wrapt"
version = "1.16.0"
description = "Module for decorators, wrappers and monkey patching."
-optional = false
+optional = true
python-versions = ">=3.6"
files = [
{file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
@@ -7631,7 +7631,9 @@ type = ["pytest-mypy"]
[extras]
falkordb = ["falkordb"]
filesystem = ["botocore"]
+groq = ["groq"]
langchain = ["langchain_text_splitters", "langsmith"]
+langfuse = ["langfuse"]
llama-index = ["llama-index-core"]
neo4j = ["neo4j"]
notebook = []
@@ -7643,4 +7645,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
-content-hash = "c707b7e45a49a3c8c00b71292ca42898f2c5905f1e62bc7c9bc48d7423e9d990"
+content-hash = "6b57d44b0924bcf64397b3807c2a6ba369166e1d2102b5312c8f8ae2d5323376"
diff --git a/pyproject.toml b/pyproject.toml
index 019415ee1..6fac2597c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,8 +59,8 @@ langdetect = "1.0.9"
posthog = {version = "^3.5.0", optional = true}
lancedb = "0.15.0"
litellm = "1.49.1"
-groq = "0.8.0"
-langfuse = "^2.32.0"
+groq = {version = "0.8.0", optional = true}
+langfuse = {version = "^2.32.0", optional = true}
pydantic-settings = "^2.2.1"
anthropic = "^0.26.1"
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
@@ -82,6 +82,8 @@ langchain = ["langsmith", "langchain_text_splitters"]
llama-index = ["llama-index-core"]
posthog = ["posthog"]
falkordb = ["falkordb"]
+groq = ["groq"]
+langfuse = ["langfuse"]
[tool.poetry.group.dev.dependencies]
From 6841c83566dac3354d2fa127aa3c6fc9713e0586 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Mon, 2 Dec 2024 20:18:55 +0100
Subject: [PATCH 12/23] fix: fixes cognify duplicated edges and resets the
methods to an older version
---
.../graph/utils/get_graph_from_model.py | 151 ++++++++----------
.../utils/get_model_instance_from_graph.py | 42 ++---
2 files changed, 83 insertions(+), 110 deletions(-)
diff --git a/cognee/modules/graph/utils/get_graph_from_model.py b/cognee/modules/graph/utils/get_graph_from_model.py
index 770e63d05..29137ddc7 100644
--- a/cognee/modules/graph/utils/get_graph_from_model.py
+++ b/cognee/modules/graph/utils/get_graph_from_model.py
@@ -1,16 +1,8 @@
from datetime import datetime, timezone
-
from cognee.infrastructure.engine import DataPoint
from cognee.modules.storage.utils import copy_model
-
-def get_graph_from_model(data_point: DataPoint, added_nodes=None, added_edges=None):
-
- if not added_nodes:
- added_nodes = {}
- if not added_edges:
- added_edges = {}
-
+def get_graph_from_model(data_point: DataPoint, include_root = True, added_nodes = {}, added_edges = {}):
nodes = []
edges = []
@@ -20,92 +12,85 @@ def get_graph_from_model(data_point: DataPoint, added_nodes=None, added_edges=No
for field_name, field_value in data_point:
if field_name == "_metadata":
continue
- elif isinstance(field_value, DataPoint):
- excluded_properties.add(field_name)
- nodes, edges, added_nodes, added_edges = add_nodes_and_edges(
- data_point,
- field_name,
- field_value,
- nodes,
- edges,
- added_nodes,
- added_edges,
- )
- elif (
- isinstance(field_value, list)
- and len(field_value) > 0
- and isinstance(field_value[0], DataPoint)
- ):
+ if isinstance(field_value, DataPoint):
excluded_properties.add(field_name)
- for item in field_value:
- n_edges_before = len(edges)
- nodes, edges, added_nodes, added_edges = add_nodes_and_edges(
- data_point, field_name, item, nodes, edges, added_nodes, added_edges
- )
- edges = edges[:n_edges_before] + [
- (*edge[:3], {**edge[3], "metadata": {"type": "list"}})
- for edge in edges[n_edges_before:]
- ]
- else:
- data_point_properties[field_name] = field_value
+ property_nodes, property_edges = get_graph_from_model(field_value, True, added_nodes, added_edges)
- SimpleDataPointModel = copy_model(
- type(data_point),
- include_fields={
- "_metadata": (dict, data_point._metadata),
- },
- exclude_fields=excluded_properties,
- )
+ for node in property_nodes:
+ if str(node.id) not in added_nodes:
+ nodes.append(node)
+ added_nodes[str(node.id)] = True
- nodes.append(SimpleDataPointModel(**data_point_properties))
+ for edge in property_edges:
+ edge_key = str(edge[0]) + str(edge[1]) + edge[2]
- return nodes, edges
+ if str(edge_key) not in added_edges:
+ edges.append(edge)
+ added_edges[str(edge_key)] = True
+ for property_node in get_own_properties(property_nodes, property_edges):
+ edge_key = str(data_point.id) + str(property_node.id) + field_name
-def add_nodes_and_edges(
- data_point, field_name, field_value, nodes, edges, added_nodes, added_edges
-):
-
- property_nodes, property_edges = get_graph_from_model(
- field_value, dict(added_nodes), dict(added_edges)
- )
-
- for node in property_nodes:
- if str(node.id) not in added_nodes:
- nodes.append(node)
- added_nodes[str(node.id)] = True
-
- for edge in property_edges:
- edge_key = str(edge[0]) + str(edge[1]) + edge[2]
-
- if str(edge_key) not in added_edges:
- edges.append(edge)
- added_edges[str(edge_key)] = True
-
- for property_node in get_own_properties(property_nodes, property_edges):
- edge_key = str(data_point.id) + str(property_node.id) + field_name
-
- if str(edge_key) not in added_edges:
- edges.append(
- (
- data_point.id,
- property_node.id,
- field_name,
- {
+ if str(edge_key) not in added_edges:
+ edges.append((data_point.id, property_node.id, field_name, {
"source_node_id": data_point.id,
"target_node_id": property_node.id,
"relationship_name": field_name,
- "updated_at": datetime.now(timezone.utc).strftime(
- "%Y-%m-%d %H:%M:%S"
- ),
- },
- )
- )
- added_edges[str(edge_key)] = True
+ "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
+ }))
+ added_edges[str(edge_key)] = True
+ continue
- return (nodes, edges, added_nodes, added_edges)
+ if isinstance(field_value, list) and len(field_value) > 0 and isinstance(field_value[0], DataPoint):
+ excluded_properties.add(field_name)
+
+ for item in field_value:
+ property_nodes, property_edges = get_graph_from_model(item, True, added_nodes, added_edges)
+
+ for node in property_nodes:
+ if str(node.id) not in added_nodes:
+ nodes.append(node)
+ added_nodes[str(node.id)] = True
+
+ for edge in property_edges:
+ edge_key = str(edge[0]) + str(edge[1]) + edge[2]
+
+ if str(edge_key) not in added_edges:
+ edges.append(edge)
+ added_edges[edge_key] = True
+
+ for property_node in get_own_properties(property_nodes, property_edges):
+ edge_key = str(data_point.id) + str(property_node.id) + field_name
+
+ if str(edge_key) not in added_edges:
+ edges.append((data_point.id, property_node.id, field_name, {
+ "source_node_id": data_point.id,
+ "target_node_id": property_node.id,
+ "relationship_name": field_name,
+ "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
+ "metadata": {
+ "type": "list"
+ },
+ }))
+ added_edges[edge_key] = True
+ continue
+
+ data_point_properties[field_name] = field_value
+
+ SimpleDataPointModel = copy_model(
+ type(data_point),
+ include_fields = {
+ "_metadata": (dict, data_point._metadata),
+ },
+ exclude_fields = excluded_properties,
+ )
+
+ if include_root:
+ nodes.append(SimpleDataPointModel(**data_point_properties))
+
+ return nodes, edges
def get_own_properties(property_nodes, property_edges):
diff --git a/cognee/modules/graph/utils/get_model_instance_from_graph.py b/cognee/modules/graph/utils/get_model_instance_from_graph.py
index 16658d743..82cdfa150 100644
--- a/cognee/modules/graph/utils/get_model_instance_from_graph.py
+++ b/cognee/modules/graph/utils/get_model_instance_from_graph.py
@@ -1,41 +1,29 @@
-from typing import Callable
-
from pydantic_core import PydanticUndefined
-
from cognee.infrastructure.engine import DataPoint
from cognee.modules.storage.utils import copy_model
-def get_model_instance_from_graph(
- nodes: list[DataPoint],
- edges: list[tuple[str, str, str, dict[str, str]]],
- entity_id: str,
-):
- node_map = {node.id: node for node in nodes}
+def get_model_instance_from_graph(nodes: list[DataPoint], edges: list, entity_id: str):
+ node_map = {}
- for source_node_id, target_node_id, edge_label, edge_properties in edges:
- source_node = node_map[source_node_id]
- target_node = node_map[target_node_id]
+ for node in nodes:
+ node_map[node.id] = node
+
+ for edge in edges:
+ source_node = node_map[edge[0]]
+ target_node = node_map[edge[1]]
+ edge_label = edge[2]
+ edge_properties = edge[3] if len(edge) == 4 else {}
edge_metadata = edge_properties.get("metadata", {})
- edge_type = edge_metadata.get("type", "default")
+ edge_type = edge_metadata.get("type")
if edge_type == "list":
- NewModel = copy_model(
- type(source_node),
- {edge_label: (list[type(target_node)], PydanticUndefined)},
- )
- source_node_dict = source_node.model_dump()
- source_node_edge_label_values = source_node_dict.get(edge_label, [])
- source_node_dict[edge_label] = source_node_edge_label_values + [target_node]
+ NewModel = copy_model(type(source_node), { edge_label: (list[type(target_node)], PydanticUndefined) })
- node_map[source_node_id] = NewModel(**source_node_dict)
+ node_map[edge[0]] = NewModel(**source_node.model_dump(), **{ edge_label: [target_node] })
else:
- NewModel = copy_model(
- type(source_node), {edge_label: (type(target_node), PydanticUndefined)}
- )
+ NewModel = copy_model(type(source_node), { edge_label: (type(target_node), PydanticUndefined) })
- node_map[target_node_id] = NewModel(
- **source_node.model_dump(), **{edge_label: target_node}
- )
+ node_map[edge[0]] = NewModel(**source_node.model_dump(), **{ edge_label: target_node })
return node_map[entity_id]
From f65070087fcc0c1f7e6add9f9df545ca9f66b7b1 Mon Sep 17 00:00:00 2001
From: Ryan Lin
Date: Tue, 3 Dec 2024 03:40:28 -0500
Subject: [PATCH 13/23] Feature: Integrate Milvus as the VectorDatabase
---
.../databases/vector/create_vector_engine.py | 43 ++-
.../databases/vector/milvus/MilvusAdapter.py | 245 ++++++++++++++++++
.../databases/vector/milvus/__init__.py | 1 +
cognee/tests/test_milvus.py | 76 ++++++
poetry.lock | 138 +++++++++-
pyproject.toml | 1 +
6 files changed, 486 insertions(+), 18 deletions(-)
create mode 100644 cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
create mode 100644 cognee/infrastructure/databases/vector/milvus/__init__.py
create mode 100644 cognee/tests/test_milvus.py
diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py
index 4b4799ee7..5dda755f6 100644
--- a/cognee/infrastructure/databases/vector/create_vector_engine.py
+++ b/cognee/infrastructure/databases/vector/create_vector_engine.py
@@ -1,11 +1,13 @@
from typing import Dict
+
class VectorConfig(Dict):
vector_db_url: str
vector_db_port: str
vector_db_key: str
vector_db_provider: str
+
def create_vector_engine(config: VectorConfig, embedding_engine):
if config["vector_db_provider"] == "weaviate":
from .weaviate_db import WeaviateAdapter
@@ -16,24 +18,37 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
return WeaviateAdapter(
config["vector_db_url"],
config["vector_db_key"],
- embedding_engine = embedding_engine
+ embedding_engine=embedding_engine
)
elif config["vector_db_provider"] == "qdrant":
if not (config["vector_db_url"] and config["vector_db_key"]):
raise EnvironmentError("Missing requred Qdrant credentials!")
-
+
from .qdrant.QDrantAdapter import QDrantAdapter
return QDrantAdapter(
- url = config["vector_db_url"],
- api_key = config["vector_db_key"],
- embedding_engine = embedding_engine
+ url=config["vector_db_url"],
+ api_key=config["vector_db_key"],
+ embedding_engine=embedding_engine
)
+ elif config['vector_db_provider'] == 'milvus':
+ from .milvus.MilvusAdapter import MilvusAdapter
+
+ if not config["vector_db_url"]:
+ raise EnvironmentError("Missing required Milvus credentials!")
+
+ return MilvusAdapter(
+ url=config["vector_db_url"],
+ api_key=config['vector_db_key'],
+ embedding_engine=embedding_engine
+ )
+
+
elif config["vector_db_provider"] == "pgvector":
from cognee.infrastructure.databases.relational import get_relational_config
-
+
# Get configuration for postgres database
relational_config = get_relational_config()
db_username = relational_config.db_username
@@ -52,8 +67,8 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
from .pgvector.PGVectorAdapter import PGVectorAdapter
return PGVectorAdapter(
- connection_string,
- config["vector_db_key"],
+ connection_string,
+ config["vector_db_key"],
embedding_engine,
)
@@ -64,16 +79,16 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
from ..hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter
return FalkorDBAdapter(
- database_url = config["vector_db_url"],
- database_port = config["vector_db_port"],
- embedding_engine = embedding_engine,
+ database_url=config["vector_db_url"],
+ database_port=config["vector_db_port"],
+ embedding_engine=embedding_engine,
)
else:
from .lancedb.LanceDBAdapter import LanceDBAdapter
return LanceDBAdapter(
- url = config["vector_db_url"],
- api_key = config["vector_db_key"],
- embedding_engine = embedding_engine,
+ url=config["vector_db_url"],
+ api_key=config["vector_db_key"],
+ embedding_engine=embedding_engine,
)
diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
new file mode 100644
index 000000000..bfc0bbd18
--- /dev/null
+++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
@@ -0,0 +1,245 @@
+import asyncio
+import logging
+from typing import List, Optional
+from uuid import UUID
+from cognee.infrastructure.engine import DataPoint
+from ..vector_db_interface import VectorDBInterface
+from ..models.ScoredResult import ScoredResult
+from ..embeddings.EmbeddingEngine import EmbeddingEngine
+from pymilvus import MilvusClient
+
+logger = logging.getLogger("MilvusAdapter")
+
+
+class IndexSchema(DataPoint):
+ text: str
+
+ _metadata: dict = {
+ "index_fields": ["text"]
+ }
+
+
+class MilvusAdapter(VectorDBInterface):
+ name = "Milvus"
+ url: str
+ api_key: Optional[str]
+ embedding_engine: EmbeddingEngine = None
+
+ def __init__(self, url: str, api_key: Optional[str], embedding_engine: EmbeddingEngine):
+ self.url = url
+ self.api_key = api_key
+
+ self.embedding_engine = embedding_engine
+
+ def get_milvus_client(self) -> MilvusClient:
+ if self.api_key is not None:
+ client = MilvusClient(uri=self.url, token=self.api_key)
+ else:
+ client = MilvusClient(uri=self.url)
+ return client
+
+ async def embed_data(self, data: List[str]) -> list[list[float]]:
+ return await self.embedding_engine.embed_text(data)
+
+ async def has_collection(self, collection_name: str) -> bool:
+ future = asyncio.Future()
+ client = self.get_milvus_client()
+ future.set_result(client.has_collection(collection_name=collection_name))
+
+ return await future
+
+ async def create_collection(
+ self,
+ collection_name: str,
+ payload_schema=None,
+ ):
+ from pymilvus import DataType, MilvusException
+ client = self.get_milvus_client()
+ if client.has_collection(collection_name=collection_name):
+ logger.info(f"Collection '{collection_name}' already exists.")
+ return True
+
+ try:
+ dimension = self.embedding_engine.get_vector_size()
+ assert dimension > 0, "Embedding dimension must be greater than 0."
+
+ schema = client.create_schema(
+ auto_id=False,
+ enable_dynamic_field=False,
+ )
+
+ schema.add_field(
+ field_name="id",
+ datatype=DataType.VARCHAR,
+ is_primary=True,
+ max_length=36
+ )
+
+ schema.add_field(
+ field_name="vector",
+ datatype=DataType.FLOAT_VECTOR,
+ dim=dimension
+ )
+
+ schema.add_field(
+ field_name="text",
+ datatype=DataType.VARCHAR,
+ max_length=60535
+ )
+
+ index_params = client.prepare_index_params()
+ index_params.add_index(
+ field_name="vector",
+ metric_type="COSINE"
+ )
+
+ client.create_collection(
+ collection_name=collection_name,
+ schema=schema,
+ index_params=index_params
+ )
+
+ client.load_collection(collection_name)
+
+ logger.info(f"Collection '{collection_name}' created successfully.")
+ return True
+ except MilvusException as e:
+ logger.error(f"Error creating collection '{collection_name}': {str(e)}")
+ raise e
+
+ async def create_data_points(
+ self,
+ collection_name: str,
+ data_points: List[DataPoint]
+ ):
+ from pymilvus import MilvusException
+ client = self.get_milvus_client()
+ data_vectors = await self.embed_data(
+ [data_point.get_embeddable_data() for data_point in data_points]
+ )
+
+ insert_data = [
+ {
+ "id": str(data_point.id),
+ "vector": data_vectors[index],
+ "text": data_point.text,
+ }
+ for index, data_point in enumerate(data_points)
+ ]
+
+ try:
+ result = client.insert(
+ collection_name=collection_name,
+ data=insert_data
+ )
+ logger.info(
+ f"Inserted {result.get('insert_count', 0)} data points into collection '{collection_name}'."
+ )
+ return result
+ except MilvusException as e:
+ logger.error(f"Error inserting data points into collection '{collection_name}': {str(e)}")
+ raise e
+
+ async def create_vector_index(self, index_name: str, index_property_name: str):
+ await self.create_collection(f"{index_name}_{index_property_name}")
+
+ async def index_data_points(self, index_name: str, index_property_name: str, data_points: List[DataPoint]):
+ formatted_data_points = [
+ IndexSchema(
+ id=data_point.id,
+ text=getattr(data_point, data_point._metadata["index_fields"][0]),
+ )
+ for data_point in data_points
+ ]
+ collection_name = f"{index_name}_{index_property_name}"
+ await self.create_data_points(collection_name, formatted_data_points)
+
+ async def retrieve(self, collection_name: str, data_point_ids: list[str]):
+ from pymilvus import MilvusException
+ client = self.get_milvus_client()
+ try:
+ filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]"""
+
+ results = client.query(
+ collection_name=collection_name,
+ expr=filter_expression,
+ output_fields=["*"],
+ )
+ return results
+ except MilvusException as e:
+ logger.error(f"Error retrieving data points from collection '{collection_name}': {str(e)}")
+ raise e
+
+ async def search(
+ self,
+ collection_name: str,
+ query_text: Optional[str] = None,
+ query_vector: Optional[List[float]] = None,
+ limit: int = 5,
+ with_vector: bool = False,
+ ):
+ from pymilvus import MilvusException
+ client = self.get_milvus_client()
+ if query_text is None and query_vector is None:
+ raise ValueError("One of query_text or query_vector must be provided!")
+
+ try:
+ query_vector = query_vector or (await self.embed_data([query_text]))[0]
+
+ output_fields = ["id", "text"]
+ if with_vector:
+ output_fields.append("vector")
+
+ results = client.search(
+ collection_name=collection_name,
+ data=[query_vector],
+ anns_field="vector",
+ limit=limit,
+ output_fields=output_fields,
+ search_params={
+ "metric_type": "COSINE",
+ },
+ )
+
+ return [
+ ScoredResult(
+ id=UUID(result["id"]),
+ score=result["distance"],
+ payload=result.get("entity", {}),
+ )
+ for result in results[0]
+ ]
+ except MilvusException as e:
+ logger.error(f"Error during search in collection '{collection_name}': {str(e)}")
+ raise e
+
+ async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False):
+ def query_search(query_vector):
+ return self.search(collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors)
+
+ return [await query_search(query_vector) for query_vector in await self.embed_data(query_texts)]
+
+ async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
+ from pymilvus import MilvusException
+ client = self.get_milvus_client()
+ try:
+ filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]"""
+
+ delete_result = client.delete(
+ collection_name=collection_name,
+ filter=filter_expression
+ )
+
+ logger.info(f"Deleted data points with IDs {data_point_ids} from collection '{collection_name}'.")
+ return delete_result
+ except MilvusException as e:
+ logger.error(f"Error deleting data points from collection '{collection_name}': {str(e)}")
+ raise e
+
+ async def prune(self):
+ client = self.get_milvus_client()
+ if client:
+ collections = client.list_collections()
+ for collection_name in collections:
+ client.drop_collection(collection_name=collection_name)
+ client.close()
diff --git a/cognee/infrastructure/databases/vector/milvus/__init__.py b/cognee/infrastructure/databases/vector/milvus/__init__.py
new file mode 100644
index 000000000..ecb3cb14b
--- /dev/null
+++ b/cognee/infrastructure/databases/vector/milvus/__init__.py
@@ -0,0 +1 @@
+from .MilvusAdapter import MilvusAdapter
diff --git a/cognee/tests/test_milvus.py b/cognee/tests/test_milvus.py
new file mode 100644
index 000000000..d565f6446
--- /dev/null
+++ b/cognee/tests/test_milvus.py
@@ -0,0 +1,76 @@
+import os
+import logging
+import pathlib
+import cognee
+from cognee.api.v1.search import SearchType
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+async def main():
+ cognee.config.set_vector_db_provider("milvus")
+ data_directory_path = str(
+ pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_milvus")).resolve())
+ cognee.config.data_root_directory(data_directory_path)
+ cognee_directory_path = str(
+ pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve())
+ cognee.config.system_root_directory(cognee_directory_path)
+
+ await cognee.prune.prune_data()
+ await cognee.prune.prune_system(metadata=True)
+
+ dataset_name = "cs_explanations"
+
+ explanation_file_path = os.path.join(pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt")
+ await cognee.add([explanation_file_path], dataset_name)
+
+ text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
+ At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
+ Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
+ The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
+ Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
+ In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
+ """
+
+ await cognee.add([text], dataset_name)
+
+ await cognee.cognify([dataset_name])
+
+ from cognee.infrastructure.databases.vector import get_vector_engine
+ vector_engine = get_vector_engine()
+ random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
+ random_node_name = random_node.payload["text"]
+
+ search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
+ assert len(search_results) != 0, "The search results list is empty."
+ print("\n\nExtracted INSIGHTS are:\n")
+ for result in search_results:
+ print(f"{result}\n")
+
+ search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
+ assert len(search_results) != 0, "The search results list is empty."
+ print("\n\nExtracted CHUNKS are:\n")
+ for result in search_results:
+ print(f"{result}\n")
+
+ search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
+ assert len(search_results) != 0, "The search results list is empty."
+ print("\nExtracted SUMMARIES are:\n")
+ for result in search_results:
+ print(f"{result}\n")
+
+ history = await cognee.get_search_history()
+ assert len(history) == 6, "Search history is not correct."
+
+ await cognee.prune.prune_data()
+ assert not os.path.isdir(data_directory_path), "Local data files are not deleted"
+
+ await cognee.prune.prune_system(metadata=True)
+ milvus_client = get_vector_engine().get_milvus_client()
+ collections = milvus_client.list_collections()
+ assert len(collections) == 0, "Milvus vector database is not empty"
+
+
+if __name__ == "__main__":
+ import asyncio
+ asyncio.run(main())
diff --git a/poetry.lock b/poetry.lock
index 7d09c340d..4b8262648 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand.
[[package]]
name = "aiofiles"
@@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5"
name = "grpcio"
version = "1.67.1"
description = "HTTP/2-based RPC framework"
-optional = true
+optional = false
python-versions = ">=3.8"
files = [
{file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
@@ -2751,6 +2751,8 @@ optional = false
python-versions = "*"
files = [
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
+ {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
+ {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
]
[package.dependencies]
@@ -3602,6 +3604,22 @@ files = [
{file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
]
+[[package]]
+name = "milvus-lite"
+version = "2.4.10"
+description = "A lightweight version of Milvus wrapped with Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"},
+ {file = "milvus_lite-2.4.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:74a8e07c5e3b057df17fbb46913388e84df1dc403a200f4e423799a58184c800"},
+ {file = "milvus_lite-2.4.10-py3-none-manylinux2014_aarch64.whl", hash = "sha256:240c7386b747bad696ecb5bd1f58d491e86b9d4b92dccee3315ed7256256eddc"},
+ {file = "milvus_lite-2.4.10-py3-none-manylinux2014_x86_64.whl", hash = "sha256:211d2e334a043f9282bdd9755f76b9b2d93b23bffa7af240919ffce6a8dfe325"},
+]
+
+[package.dependencies]
+tqdm = "*"
+
[[package]]
name = "mistune"
version = "3.0.2"
@@ -4938,7 +4956,7 @@ files = [
name = "protobuf"
version = "5.28.3"
description = ""
-optional = true
+optional = false
python-versions = ">=3.8"
files = [
{file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"},
@@ -5360,6 +5378,31 @@ pyyaml = "*"
[package.extras]
extra = ["pygments (>=2.12)"]
+[[package]]
+name = "pymilvus"
+version = "2.5.0"
+description = "Python Sdk for Milvus"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"},
+ {file = "pymilvus-2.5.0.tar.gz", hash = "sha256:4da14a3bd957a4921166f9355fd1f1ac5c5e4e80b46f12f64d9c9a6dcb8cb395"},
+]
+
+[package.dependencies]
+grpcio = ">=1.49.1,<=1.67.1"
+milvus-lite = {version = ">=2.4.0", markers = "sys_platform != \"win32\""}
+pandas = ">=1.2.4"
+protobuf = ">=3.20.0"
+python-dotenv = ">=1.0.1,<2.0.0"
+setuptools = ">69"
+ujson = ">=2.0.0"
+
+[package.extras]
+bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"]
+dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"]
+model = ["milvus-model (>=0.1.0)"]
+
[[package]]
name = "pyparsing"
version = "3.2.0"
@@ -7075,6 +7118,93 @@ files = [
{file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"},
]
+[[package]]
+name = "ujson"
+version = "5.10.0"
+description = "Ultra fast JSON encoder and decoder for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"},
+ {file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"},
+ {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"},
+ {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"},
+ {file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"},
+ {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"},
+ {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"},
+ {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"},
+ {file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"},
+ {file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"},
+ {file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"},
+ {file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"},
+ {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"},
+ {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"},
+ {file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"},
+ {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"},
+ {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"},
+ {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"},
+ {file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"},
+ {file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"},
+ {file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"},
+ {file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"},
+ {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"},
+ {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"},
+ {file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"},
+ {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"},
+ {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"},
+ {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"},
+ {file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"},
+ {file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"},
+ {file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"},
+ {file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"},
+ {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"},
+ {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"},
+ {file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"},
+ {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"},
+ {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"},
+ {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"},
+ {file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"},
+ {file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"},
+ {file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"},
+ {file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"},
+ {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"},
+ {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"},
+ {file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"},
+ {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"},
+ {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"},
+ {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"},
+ {file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"},
+ {file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"},
+ {file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"},
+ {file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"},
+ {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"},
+ {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"},
+ {file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"},
+ {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"},
+ {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"},
+ {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"},
+ {file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"},
+ {file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"},
+ {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"},
+ {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"},
+ {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"},
+ {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"},
+ {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"},
+ {file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"},
+ {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"},
+ {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"},
+ {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"},
+ {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"},
+ {file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"},
+ {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"},
+ {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"},
+ {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"},
+ {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"},
+ {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"},
+ {file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"},
+ {file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"},
+]
+
[[package]]
name = "uri-template"
version = "1.3.0"
@@ -7645,4 +7775,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
-content-hash = "6b57d44b0924bcf64397b3807c2a6ba369166e1d2102b5312c8f8ae2d5323376"
+content-hash = "6d578f99d990d462114faecd28a81aa50417bc541d64a67b53063f6c107eb3d3"
diff --git a/pyproject.toml b/pyproject.toml
index 6fac2597c..44ca875a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,6 +70,7 @@ asyncpg = {version = "0.30.0", optional = true}
pgvector = {version = "^0.3.5", optional = true}
psycopg2 = {version = "^2.9.10", optional = true}
llama-index-core = {version = "^0.11.22", optional = true}
+pymilvus = "^2.5.0"
[tool.poetry.extras]
filesystem = ["s3fs", "botocore"]
From fb5f0cf00fdc1dff830531594c8e6c79504a2bdc Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Tue, 3 Dec 2024 10:37:50 +0100
Subject: [PATCH 14/23] chore: Make milvus an optional dependency
Make Milvus an optional dependency, expand docs with Milvus information
Chore
---
.env.template | 2 +-
README.md | 12 ++++++++++++
pyproject.toml | 4 ++--
3 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/.env.template b/.env.template
index ff591c0a7..75a57de4d 100644
--- a/.env.template
+++ b/.env.template
@@ -14,7 +14,7 @@ GRAPH_DATABASE_URL=
GRAPH_DATABASE_USERNAME=
GRAPH_DATABASE_PASSWORD=
-# "qdrant", "pgvector", "weaviate" or "lancedb"
+# "qdrant", "pgvector", "weaviate", "milvus" or "lancedb"
VECTOR_DB_PROVIDER="lancedb"
# Not needed if using "lancedb" or "pgvector"
VECTOR_DB_URL=
diff --git a/README.md b/README.md
index 2b29f1448..efb6e23ba 100644
--- a/README.md
+++ b/README.md
@@ -53,6 +53,12 @@ pip install 'cognee[qdrant]'
pip install 'cognee[neo4j]'
```
+### With pip with Milvus support
+
+```bash
+pip install 'cognee[milvus]'
+```
+
### With poetry
```bash
@@ -83,6 +89,12 @@ poetry add cognee -E qdrant
poetry add cognee -E neo4j
```
+### With poetry with Milvus support
+
+```bash
+poetry add cognee -E milvus
+```
+
## 💻 Basic Usage
diff --git a/pyproject.toml b/pyproject.toml
index 44ca875a6..c66b23c89 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ asyncpg = {version = "0.30.0", optional = true}
pgvector = {version = "^0.3.5", optional = true}
psycopg2 = {version = "^2.9.10", optional = true}
llama-index-core = {version = "^0.11.22", optional = true}
-pymilvus = "^2.5.0"
+pymilvus = {version = "^2.5.0", optional = true}
[tool.poetry.extras]
filesystem = ["s3fs", "botocore"]
@@ -85,7 +85,7 @@ posthog = ["posthog"]
falkordb = ["falkordb"]
groq = ["groq"]
langfuse = ["langfuse"]
-
+milvus = ["pymilvus"]
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
From 764c0895dfb884e1e054942d6d8af76b5ab57c16 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Tue, 3 Dec 2024 11:13:54 +0100
Subject: [PATCH 15/23] fix: Resolve Milvus connection issue, add config to
milvus test, add milvus gh action
Resolved if statement resolution issue regrading api key,
Added vector db config to milvus test,
Added milvus gh action
Fix
---
.github/workflows/test_milvus.yml | 64 +++++++++++++++++++
.../databases/vector/milvus/MilvusAdapter.py | 8 ++-
cognee/tests/test_milvus.py | 8 +++
poetry.lock | 17 +++--
4 files changed, 85 insertions(+), 12 deletions(-)
create mode 100644 .github/workflows/test_milvus.yml
diff --git a/.github/workflows/test_milvus.yml b/.github/workflows/test_milvus.yml
new file mode 100644
index 000000000..2cfd88993
--- /dev/null
+++ b/.github/workflows/test_milvus.yml
@@ -0,0 +1,64 @@
+name: test | milvus
+
+on:
+ workflow_dispatch:
+ pull_request:
+ branches:
+ - main
+ types: [labeled, synchronize]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+env:
+ RUNTIME__LOG_LEVEL: ERROR
+ ENV: 'dev'
+
+jobs:
+ get_docs_changes:
+ name: docs changes
+ uses: ./.github/workflows/get_docs_changes.yml
+
+ run_milvus:
+ name: test
+ needs: get_docs_changes
+ if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ defaults:
+ run:
+ shell: bash
+
+ steps:
+ - name: Check out
+ uses: actions/checkout@master
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11.x'
+
+ - name: Install Poetry
+ # https://github.com/snok/install-poetry#running-on-windows
+ uses: snok/install-poetry@v1.3.2
+ with:
+ virtualenvs-create: true
+ virtualenvs-in-project: true
+ installer-parallel: true
+
+ - name: Install dependencies
+ run: poetry install -E milvus --no-interaction
+
+ - name: Run default basic pipeline
+ env:
+ ENV: 'dev'
+ LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ run: poetry run python ./cognee/tests/test_milvus.py
+
+ - name: Clean up disk space
+ run: |
+ sudo rm -rf ~/.cache
+ sudo rm -rf /tmp/*
+ df -h
diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
index bfc0bbd18..d3774542a 100644
--- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
+++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
import asyncio
import logging
from typing import List, Optional
@@ -6,7 +8,6 @@ from cognee.infrastructure.engine import DataPoint
from ..vector_db_interface import VectorDBInterface
from ..models.ScoredResult import ScoredResult
from ..embeddings.EmbeddingEngine import EmbeddingEngine
-from pymilvus import MilvusClient
logger = logging.getLogger("MilvusAdapter")
@@ -31,8 +32,9 @@ class MilvusAdapter(VectorDBInterface):
self.embedding_engine = embedding_engine
- def get_milvus_client(self) -> MilvusClient:
- if self.api_key is not None:
+ def get_milvus_client(self) -> "MilvusClient":
+ from pymilvus import MilvusClient
+ if self.api_key:
client = MilvusClient(uri=self.url, token=self.api_key)
else:
client = MilvusClient(uri=self.url)
diff --git a/cognee/tests/test_milvus.py b/cognee/tests/test_milvus.py
index d565f6446..b32d3590b 100644
--- a/cognee/tests/test_milvus.py
+++ b/cognee/tests/test_milvus.py
@@ -16,6 +16,14 @@ async def main():
pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve())
cognee.config.system_root_directory(cognee_directory_path)
+ cognee.config.set_vector_db_config(
+ {
+ "vector_db_url": os.path.join(cognee_directory_path, "databases/milvus.db"),
+ "vector_db_key": "",
+ "vector_db_provider": "milvus"
+ }
+ )
+
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
diff --git a/poetry.lock b/poetry.lock
index 4b8262648..3611dacea 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "aiofiles"
@@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5"
name = "grpcio"
version = "1.67.1"
description = "HTTP/2-based RPC framework"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
@@ -2751,8 +2751,6 @@ optional = false
python-versions = "*"
files = [
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
- {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
- {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
]
[package.dependencies]
@@ -3608,7 +3606,7 @@ files = [
name = "milvus-lite"
version = "2.4.10"
description = "A lightweight version of Milvus wrapped with Python."
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"},
@@ -4956,7 +4954,7 @@ files = [
name = "protobuf"
version = "5.28.3"
description = ""
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"},
@@ -5382,7 +5380,7 @@ extra = ["pygments (>=2.12)"]
name = "pymilvus"
version = "2.5.0"
description = "Python Sdk for Milvus"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"},
@@ -7122,7 +7120,7 @@ files = [
name = "ujson"
version = "5.10.0"
description = "Ultra fast JSON encoder and decoder for Python"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"},
@@ -7765,6 +7763,7 @@ groq = ["groq"]
langchain = ["langchain_text_splitters", "langsmith"]
langfuse = ["langfuse"]
llama-index = ["llama-index-core"]
+milvus = ["pymilvus"]
neo4j = ["neo4j"]
notebook = []
postgres = ["asyncpg", "pgvector", "psycopg2"]
@@ -7775,4 +7774,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
-content-hash = "6d578f99d990d462114faecd28a81aa50417bc541d64a67b53063f6c107eb3d3"
+content-hash = "d6b10b74a910202f224ff34fa06ad3d2767796a6492a96724de0d608ac0356c5"
From c301498da055ce3f93020c249756d78d0f1dd24b Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Tue, 3 Dec 2024 11:58:34 +0100
Subject: [PATCH 16/23] fix: Fix batch search function
Rewrite batch search to work as async gather
Fix
---
.../databases/vector/milvus/MilvusAdapter.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
index d3774542a..84beb7273 100644
--- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
+++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
@@ -216,10 +216,15 @@ class MilvusAdapter(VectorDBInterface):
raise e
async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False):
- def query_search(query_vector):
- return self.search(collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors)
+ query_vectors = await self.embed_data(query_texts)
- return [await query_search(query_vector) for query_vector in await self.embed_data(query_texts)]
+ return await asyncio.gather(
+ *[self.search(collection_name=collection_name,
+ query_vector=query_vector,
+ limit=limit,
+ with_vector=with_vectors,
+ ) for query_vector in query_vectors]
+ )
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
from pymilvus import MilvusException
From e462ebe2e67f7368b1d59b826df4a4fba686ec1a Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Tue, 3 Dec 2024 13:38:07 +0100
Subject: [PATCH 17/23] docs: Update README.md with stable databases
Update README.md with state of stable databases
Docs
---
README.md | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index efb6e23ba..e57b498ab 100644
--- a/README.md
+++ b/README.md
@@ -368,12 +368,13 @@ pip install cognee
}
-| Name | Type | Current state | Known Issues |
-|------------------|--------------------|-------------------|---------------------------------------|
-| Qdrant | Vector | Stable ✅ | |
-| Weaviate | Vector | Stable ✅ | |
-| LanceDB | Vector | Stable ✅ | |
-| Neo4j | Graph | Stable ✅ | |
-| NetworkX | Graph | Stable ✅ | |
-| FalkorDB | Vector/Graph | Unstable ❌ | |
-| PGVector | Vector | Unstable ❌ | Postgres DB returns the Timeout error |
+| Name | Type | Current state | Known Issues |
+|----------|--------------------|-------------------|--------------|
+| Qdrant | Vector | Stable ✅ | |
+| Weaviate | Vector | Stable ✅ | |
+| LanceDB | Vector | Stable ✅ | |
+| Neo4j | Graph | Stable ✅ | |
+| NetworkX | Graph | Stable ✅ | |
+| FalkorDB | Vector/Graph | Unstable ❌ | |
+| PGVector | Vector | Stable ✅ | |
+| Milvus | Vector | Stable ✅ | |
\ No newline at end of file
From 18e0aa2f6c30c3bb89b21b35b75c1d577f4569e6 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Tue, 3 Dec 2024 14:20:29 +0100
Subject: [PATCH 18/23] fix: deletes get_graph_from_model test of the faulty
old implementation
---
.../graph/get_graph_from_model_test.py | 89 -------------------
1 file changed, 89 deletions(-)
delete mode 100644 cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py
diff --git a/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py b/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py
deleted file mode 100644
index e56a2dff2..000000000
--- a/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from cognee.modules.graph.utils import get_graph_from_model
-from cognee.tests.unit.interfaces.graph.util import run_test_against_ground_truth
-
-CAR_SEDAN_EDGE = (
- "car1",
- "sedan",
- "is_type",
- {
- "source_node_id": "car1",
- "target_node_id": "sedan",
- "relationship_name": "is_type",
- },
-)
-
-
-BORIS_CAR_EDGE_GROUND_TRUTH = (
- "boris",
- "car1",
- "owns_car",
- {
- "source_node_id": "boris",
- "target_node_id": "car1",
- "relationship_name": "owns_car",
- "metadata": {"type": "list"},
- },
-)
-
-CAR_TYPE_GROUND_TRUTH = {"id": "sedan"}
-
-CAR_GROUND_TRUTH = {
- "id": "car1",
- "brand": "Toyota",
- "model": "Camry",
- "year": 2020,
- "color": "Blue",
-}
-
-PERSON_GROUND_TRUTH = {
- "id": "boris",
- "name": "Boris",
- "age": 30,
- "driving_license": {
- "issued_by": "PU Vrsac",
- "issued_on": "2025-11-06",
- "number": "1234567890",
- "expires_on": "2025-11-06",
- },
-}
-
-
-def test_extracted_car_type(boris):
- nodes, _ = get_graph_from_model(boris)
- assert len(nodes) == 3
- car_type = nodes[0]
- run_test_against_ground_truth("car_type", car_type, CAR_TYPE_GROUND_TRUTH)
-
-
-def test_extracted_car(boris):
- nodes, _ = get_graph_from_model(boris)
- assert len(nodes) == 3
- car = nodes[1]
- run_test_against_ground_truth("car", car, CAR_GROUND_TRUTH)
-
-
-def test_extracted_person(boris):
- nodes, _ = get_graph_from_model(boris)
- assert len(nodes) == 3
- person = nodes[2]
- run_test_against_ground_truth("person", person, PERSON_GROUND_TRUTH)
-
-
-def test_extracted_car_sedan_edge(boris):
- _, edges = get_graph_from_model(boris)
- edge = edges[0]
-
- assert CAR_SEDAN_EDGE[:3] == edge[:3], f"{CAR_SEDAN_EDGE[:3] = } != {edge[:3] = }"
- for key, ground_truth in CAR_SEDAN_EDGE[3].items():
- assert ground_truth == edge[3][key], f"{ground_truth = } != {edge[3][key] = }"
-
-
-def test_extracted_boris_car_edge(boris):
- _, edges = get_graph_from_model(boris)
- edge = edges[1]
-
- assert (
- BORIS_CAR_EDGE_GROUND_TRUTH[:3] == edge[:3]
- ), f"{BORIS_CAR_EDGE_GROUND_TRUTH[:3] = } != {edge[:3] = }"
- for key, ground_truth in BORIS_CAR_EDGE_GROUND_TRUTH[3].items():
- assert ground_truth == edge[3][key], f"{ground_truth = } != {edge[3][key] = }"
From 688c3dfdb7094340269b5a221ca374d67b5778d2 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Tue, 3 Dec 2024 14:33:46 +0100
Subject: [PATCH 19/23] Fix: deletes test that were implemented the faulty
get_model_from_graph method
---
.../get_graph_from_model_generative_test.py | 37 -------------------
...del_instance_from_graph_generative_test.py | 33 -----------------
.../get_model_instance_from_graph_test.py | 35 ------------------
3 files changed, 105 deletions(-)
delete mode 100644 cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py
delete mode 100644 cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py
delete mode 100644 cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py
diff --git a/cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py b/cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py
deleted file mode 100644
index dec751f89..000000000
--- a/cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import warnings
-
-import pytest
-
-from cognee.modules.graph.utils import get_graph_from_model
-from cognee.tests.unit.interfaces.graph.util import (
- PERSON_NAMES,
- count_society,
- create_organization_recursive,
-)
-
-
-@pytest.mark.parametrize("recursive_depth", [1, 2, 3])
-def test_society_nodes_and_edges(recursive_depth):
- import sys
-
- if sys.version_info[0] == 3 and sys.version_info[1] >= 11:
- society = create_organization_recursive(
- "society", "Society", PERSON_NAMES, recursive_depth
- )
-
- n_organizations, n_persons = count_society(society)
- society_counts_total = n_organizations + n_persons
-
- nodes, edges = get_graph_from_model(society)
-
- assert (
- len(nodes) == society_counts_total
- ), f"{society_counts_total = } != {len(nodes) = }, not all DataPoint instances were found"
-
- assert len(edges) == (
- len(nodes) - 1
- ), f"{(len(nodes) - 1) = } != {len(edges) = }, there have to be n_nodes - 1 edges, as each node has exactly one parent node, except for the root node"
- else:
- warnings.warn(
- "The recursive pydantic data structure cannot be reconstructed from the graph because the 'inner' pydantic class is not defined. Hence this test is skipped. This problem is solved in Python 3.11"
- )
diff --git a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py b/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py
deleted file mode 100644
index dd5e19469..000000000
--- a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import warnings
-
-import pytest
-
-from cognee.modules.graph.utils import (
- get_graph_from_model,
- get_model_instance_from_graph,
-)
-from cognee.tests.unit.interfaces.graph.util import (
- PERSON_NAMES,
- create_organization_recursive,
- show_first_difference,
-)
-
-
-@pytest.mark.parametrize("recursive_depth", [1, 2, 3])
-def test_society_nodes_and_edges(recursive_depth):
- import sys
-
- if sys.version_info[0] == 3 and sys.version_info[1] >= 11:
- society = create_organization_recursive(
- "society", "Society", PERSON_NAMES, recursive_depth
- )
- nodes, edges = get_graph_from_model(society)
- parsed_society = get_model_instance_from_graph(nodes, edges, "society")
-
- assert str(society) == (str(parsed_society)), show_first_difference(
- str(society), str(parsed_society), "society", "parsed_society"
- )
- else:
- warnings.warn(
- "The recursive pydantic data structure cannot be reconstructed from the graph because the 'inner' pydantic class is not defined. Hence this test is skipped. This problem is solved in Python 3.11"
- )
diff --git a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py b/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py
deleted file mode 100644
index f1aa7736d..000000000
--- a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from cognee.modules.graph.utils import (
- get_graph_from_model,
- get_model_instance_from_graph,
-)
-from cognee.tests.unit.interfaces.graph.util import run_test_against_ground_truth
-
-PARSED_PERSON_GROUND_TRUTH = {
- "id": "boris",
- "name": "Boris",
- "age": 30,
- "driving_license": {
- "issued_by": "PU Vrsac",
- "issued_on": "2025-11-06",
- "number": "1234567890",
- "expires_on": "2025-11-06",
- },
-}
-
-CAR_GROUND_TRUTH = {
- "id": "car1",
- "brand": "Toyota",
- "model": "Camry",
- "year": 2020,
- "color": "Blue",
-}
-
-
-def test_parsed_person(boris):
- nodes, edges = get_graph_from_model(boris)
- parsed_person = get_model_instance_from_graph(nodes, edges, "boris")
-
- run_test_against_ground_truth(
- "parsed_person", parsed_person, PARSED_PERSON_GROUND_TRUTH
- )
- run_test_against_ground_truth("car", parsed_person.owns_car[0], CAR_GROUND_TRUTH)
From 57f319fb32c1b271684358cd13706c41f0da33d8 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Tue, 3 Dec 2024 14:44:49 +0100
Subject: [PATCH 20/23] fix: deletes dummy data of faulty tests
---
.../tests/unit/interfaces/graph/conftest.py | 68 --------
cognee/tests/unit/interfaces/graph/util.py | 150 ------------------
2 files changed, 218 deletions(-)
delete mode 100644 cognee/tests/unit/interfaces/graph/conftest.py
delete mode 100644 cognee/tests/unit/interfaces/graph/util.py
diff --git a/cognee/tests/unit/interfaces/graph/conftest.py b/cognee/tests/unit/interfaces/graph/conftest.py
deleted file mode 100644
index 45f977bd6..000000000
--- a/cognee/tests/unit/interfaces/graph/conftest.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from enum import Enum
-from typing import Optional
-
-import pytest
-
-from cognee.infrastructure.engine import DataPoint
-
-
-class CarTypeName(Enum):
- Pickup = "Pickup"
- Sedan = "Sedan"
- SUV = "SUV"
- Coupe = "Coupe"
- Convertible = "Convertible"
- Hatchback = "Hatchback"
- Wagon = "Wagon"
- Minivan = "Minivan"
- Van = "Van"
-
-
-class CarType(DataPoint):
- id: str
- name: CarTypeName
- _metadata: dict = dict(index_fields=["name"])
-
-
-class Car(DataPoint):
- id: str
- brand: str
- model: str
- year: int
- color: str
- is_type: CarType
-
-
-class Person(DataPoint):
- id: str
- name: str
- age: int
- owns_car: list[Car]
- driving_license: Optional[dict]
- _metadata: dict = dict(index_fields=["name"])
-
-
-@pytest.fixture(scope="function")
-def boris():
- boris = Person(
- id="boris",
- name="Boris",
- age=30,
- owns_car=[
- Car(
- id="car1",
- brand="Toyota",
- model="Camry",
- year=2020,
- color="Blue",
- is_type=CarType(id="sedan", name=CarTypeName.Sedan),
- )
- ],
- driving_license={
- "issued_by": "PU Vrsac",
- "issued_on": "2025-11-06",
- "number": "1234567890",
- "expires_on": "2025-11-06",
- },
- )
- return boris
diff --git a/cognee/tests/unit/interfaces/graph/util.py b/cognee/tests/unit/interfaces/graph/util.py
deleted file mode 100644
index a20bdb3e4..000000000
--- a/cognee/tests/unit/interfaces/graph/util.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import random
-import string
-from datetime import datetime, timezone
-from typing import Any, Dict, Optional
-
-from cognee.infrastructure.engine import DataPoint
-
-
-def run_test_against_ground_truth(
- test_target_item_name: str, test_target_item: Any, ground_truth_dict: Dict[str, Any]
-):
- """Validates test target item attributes against ground truth values.
-
- Args:
- test_target_item_name: Name of the item being tested (for error messages)
- test_target_item: Object whose attributes are being validated
- ground_truth_dict: Dictionary containing expected values
-
- Raises:
- AssertionError: If any attribute doesn't match ground truth or if update timestamp is too old
- """
- for key, ground_truth in ground_truth_dict.items():
- if isinstance(ground_truth, dict):
- for key2, ground_truth2 in ground_truth.items():
- assert (
- ground_truth2 == getattr(test_target_item, key)[key2]
- ), f"{test_target_item_name}/{key = }/{key2 = }: {ground_truth2 = } != {getattr(test_target_item, key)[key2] = }"
- elif isinstance(ground_truth, list):
- raise NotImplementedError("Currently not implemented for 'list'")
- else:
- assert ground_truth == getattr(
- test_target_item, key
- ), f"{test_target_item_name}/{key = }: {ground_truth = } != {getattr(test_target_item, key) = }"
- time_delta = datetime.now(timezone.utc) - getattr(test_target_item, "updated_at")
-
- assert time_delta.total_seconds() < 60, f"{ time_delta.total_seconds() = }"
-
-
-class Organization(DataPoint):
- id: str
- name: str
- members: Optional[list["SocietyPerson"]]
-
-
-class SocietyPerson(DataPoint):
- id: str
- name: str
- memberships: Optional[list[Organization]]
-
-
-SocietyPerson.model_rebuild()
-Organization.model_rebuild()
-
-
-ORGANIZATION_NAMES = [
- "ChessClub",
- "RowingClub",
- "TheatreTroupe",
- "PoliticalParty",
- "Charity",
- "FanClub",
- "FilmClub",
- "NeighborhoodGroup",
- "LocalCouncil",
- "Band",
-]
-PERSON_NAMES = ["Sarah", "Anna", "John", "Sam"]
-
-
-def create_society_person_recursive(id, name, organization_names, max_depth, depth=0):
- id_suffix = "".join(random.choice(string.ascii_lowercase) for _ in range(10))
-
- if depth < max_depth:
- memberships = [
- create_organization_recursive(
- f"{org_name}-{depth}-{id_suffix}",
- org_name.lower(),
- PERSON_NAMES,
- max_depth,
- depth + 1,
- )
- for org_name in organization_names
- ]
- else:
- memberships = None
-
- return SocietyPerson(id=id, name=f"{name}{depth}", memberships=memberships)
-
-
-def create_organization_recursive(id, name, member_names, max_depth, depth=0):
- id_suffix = "".join(random.choice(string.ascii_lowercase) for _ in range(10))
-
- if depth < max_depth:
- members = [
- create_society_person_recursive(
- f"{member_name}-{depth}-{id_suffix}",
- member_name.lower(),
- ORGANIZATION_NAMES,
- max_depth,
- depth + 1,
- )
- for member_name in member_names
- ]
- else:
- members = None
-
- return Organization(id=id, name=f"{name}{depth}", members=members)
-
-
-def count_society(obj):
- if isinstance(obj, SocietyPerson):
- if obj.memberships is not None:
- organization_counts, society_person_counts = zip(
- *[count_society(organization) for organization in obj.memberships]
- )
- organization_count = sum(organization_counts)
- society_person_count = sum(society_person_counts) + 1
- return (organization_count, society_person_count)
- else:
- return (0, 1)
- if isinstance(obj, Organization):
- if obj.members is not None:
- organization_counts, society_person_counts = zip(
- *[count_society(organization) for organization in obj.members]
- )
- organization_count = sum(organization_counts) + 1
- society_person_count = sum(society_person_counts)
- return (organization_count, society_person_count)
- else:
- return (1, 0)
- else:
- raise Exception("Not allowed")
-
-
-def show_first_difference(str1, str2, str1_name, str2_name, context=30):
- for i, (c1, c2) in enumerate(zip(str1, str2)):
- if c1 != c2:
- start = max(0, i - context)
- end1 = min(len(str1), i + context + 1)
- end2 = min(len(str2), i + context + 1)
- if i > 0:
- return f"identical: '{str1[start:i-1]}' | {str1_name}: '{str1[i-1:end1]}'... != {str2_name}: '{str2[i-1:end2]}'..."
- else:
- return f"{str1_name} and {str2_name} have no overlap in characters"
- if len(str1) > len(str2):
- return f"{str2_name} is identical up to the {i}th character, missing afterwards '{str1[i:i+context]}'..."
- if len(str2) > len(str1):
- return f"{str1_name} is identical up to the {i}th character, missing afterwards '{str2[i:i+context]}'..."
- else:
- return f"{str1_name} and {str2_name} are identical."
From 1d18dd2f18b9a4566281981c216c377eafac29d2 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Tue, 3 Dec 2024 17:33:18 +0100
Subject: [PATCH 21/23] docs: Update README.md to have less clutter
Resolve clutter in README.md
docs
---
README.md | 70 ++++++++++++++++++-------------------------------------
1 file changed, 23 insertions(+), 47 deletions(-)
diff --git a/README.md b/README.md
index e57b498ab..dc43a65c0 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ If you have questions, join our Discord
## 📦 Installation
-You can install Cognee using either **pip** or **poetry**.
+You can install Cognee using either **pip** or **poetry**.
Support for various databases and vector stores is available through extras.
### With pip
@@ -29,73 +29,49 @@ Support for various databases and vector stores is available through extras.
pip install cognee
```
-### With pip with PostgreSQL support
-
-```bash
-pip install 'cognee[postgres]'
-```
-
-### With pip with Weaviate support
-
-```bash
-pip install 'cognee[weaviate]'
-```
-
-### With pip with Qdrant support
-
-```bash
-pip install 'cognee[qdrant]'
-```
-
-### With pip with Neo4j support
-
-```bash
-pip install 'cognee[neo4j]'
-```
-
-### With pip with Milvus support
-
-```bash
-pip install 'cognee[milvus]'
-```
-
### With poetry
```bash
poetry add cognee
```
-### With poetry with PostgreSQL support
+### With pip with specific database support
+To install Cognee with support for specific databases use the appropriate command below. Replace \ with the name of the database you need.
```bash
-poetry add cognee -E postgres
+pip install 'cognee[]'
```
-### With poetry with Weaviate support
+Replace \ with any of the following databases:
+- postgres
+- weaviate
+- qdrant
+- neo4j
+- milvus
+For example with postgres and neo4j support:
```bash
-poetry add cognee -E weaviate
+pip install 'cognee[postgres, neo4j]'
```
-### With poetry with Qdrant support
+### With poetry with specific database support
+To install Cognee with support for specific databases use the appropriate command below. Replace \ with the name of the database you need.
```bash
-poetry add cognee -E qdrant
+poetry add cognee -E
```
+Replace \ with any of the following databases:
+- postgres
+- weaviate
+- qdrant
+- neo4j
+- milvus
-### With poetry with Neo4j support
-
+For example with postgres and neo4j support:
```bash
-poetry add cognee -E neo4j
+poetry add cognee -E postgres -E neo4j
```
-### With poetry with Milvus support
-
-```bash
-poetry add cognee -E milvus
-```
-
-
## 💻 Basic Usage
### Setup
From 50e9b81aae61fa1218371315b276af9f40a18be5 Mon Sep 17 00:00:00 2001
From: Igor Ilic
Date: Tue, 3 Dec 2024 17:38:31 +0100
Subject: [PATCH 22/23] docs: Better wording for README.md
Better wording for part of README.md
docs
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index dc43a65c0..f0aa2a280 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,7 @@ Replace \ with any of the following databases:
- neo4j
- milvus
-For example with postgres and neo4j support:
+Installing Cognee with PostgreSQL and Neo4j support example:
```bash
pip install 'cognee[postgres, neo4j]'
```
@@ -67,7 +67,7 @@ Replace \ with any of the following databases:
- neo4j
- milvus
-For example with postgres and neo4j support:
+Installing Cognee with PostgreSQL and Neo4j support example:
```bash
poetry add cognee -E postgres -E neo4j
```
From d49ab4c3b5da7cef73ea22e1f615fb14cd1eb884 Mon Sep 17 00:00:00 2001
From: Boris Arzentar
Date: Tue, 3 Dec 2024 23:48:12 +0100
Subject: [PATCH 23/23] feat: update code-graph notebook
---
.../relational/get_relational_engine.py | 4 +-
.../embeddings/LiteLLMEmbeddingEngine.py | 23 +-
evals/eval_swe_bench.py | 20 +-
notebooks/cognee_code_graph_demo.ipynb | 130 ++++---
poetry.lock | 332 +++++++++++++++++-
pyproject.toml | 2 +
6 files changed, 419 insertions(+), 92 deletions(-)
diff --git a/cognee/infrastructure/databases/relational/get_relational_engine.py b/cognee/infrastructure/databases/relational/get_relational_engine.py
index c0a66e28e..d035f5baf 100644
--- a/cognee/infrastructure/databases/relational/get_relational_engine.py
+++ b/cognee/infrastructure/databases/relational/get_relational_engine.py
@@ -1,9 +1,9 @@
-from functools import lru_cache
+# from functools import lru_cache
from .config import get_relational_config
from .create_relational_engine import create_relational_engine
-@lru_cache
+# @lru_cache
def get_relational_engine():
relational_config = get_relational_config()
diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
index edc8eb57f..6526e59c3 100644
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@@ -1,4 +1,6 @@
+import asyncio
import logging
+import math
from typing import List, Optional
import litellm
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
@@ -36,11 +38,26 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
api_base = self.endpoint,
api_version = self.api_version
)
- except litellm.exceptions.BadRequestError as error:
+ return [data["embedding"] for data in response.data]
+
+ except litellm.exceptions.ContextWindowExceededError as error:
+ if isinstance(text, list):
+ parts = [text[0:math.ceil(len(text)/2)], text[math.ceil(len(text)/2):]]
+ parts_futures = [self.embed_text(part) for part in parts]
+ embeddings = await asyncio.gather(*parts_futures)
+
+ all_embeddings = []
+ for embeddings_part in embeddings:
+ all_embeddings.extend(embeddings_part)
+
+ return [data["embedding"] for data in all_embeddings]
+
+ logger.error("Context window exceeded for embedding text: %s", str(error))
+ raise error
+
+ except Exception as error:
logger.error("Error embedding text: %s", str(error))
raise error
- return [data["embedding"] for data in response.data]
-
def get_vector_size(self) -> int:
return self.dimensions
diff --git a/evals/eval_swe_bench.py b/evals/eval_swe_bench.py
index 9cd679429..ee13c92e4 100644
--- a/evals/eval_swe_bench.py
+++ b/evals/eval_swe_bench.py
@@ -7,20 +7,19 @@ from pathlib import Path
from swebench.harness.utils import load_swebench_dataset
from swebench.inference.make_datasets.create_instance import PATCH_EXAMPLE
-import cognee
from cognee.api.v1.search import SearchType
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.modules.pipelines import Task, run_tasks
from cognee.modules.retrieval.brute_force_triplet_search import \
brute_force_triplet_search
-from cognee.shared.data_models import SummarizedContent
+# from cognee.shared.data_models import SummarizedContent
from cognee.shared.utils import render_graph
from cognee.tasks.repo_processor import (enrich_dependency_graph,
expand_dependency_graph,
get_repo_file_dependencies)
from cognee.tasks.storage import add_data_points
-from cognee.tasks.summarization import summarize_code
+# from cognee.tasks.summarization import summarize_code
from evals.eval_utils import download_github_repo, retrieved_edges_to_string
@@ -43,8 +42,21 @@ def check_install_package(package_name):
async def generate_patch_with_cognee(instance, llm_client, search_type=SearchType.CHUNKS):
+ import os
+ import pathlib
+ import cognee
+ from cognee.infrastructure.databases.relational import create_db_and_tables
+
+ file_path = Path(__file__).parent
+ data_directory_path = str(pathlib.Path(os.path.join(file_path, ".data_storage/code_graph")).resolve())
+ cognee.config.data_root_directory(data_directory_path)
+ cognee_directory_path = str(pathlib.Path(os.path.join(file_path, ".cognee_system/code_graph")).resolve())
+ cognee.config.system_root_directory(cognee_directory_path)
+
await cognee.prune.prune_data()
- await cognee.prune.prune_system()
+ await cognee.prune.prune_system(metadata = True)
+
+ await create_db_and_tables()
# repo_path = download_github_repo(instance, '../RAW_GIT_REPOS')
diff --git a/notebooks/cognee_code_graph_demo.ipynb b/notebooks/cognee_code_graph_demo.ipynb
index 5e21e9dad..f5735dcb8 100644
--- a/notebooks/cognee_code_graph_demo.ipynb
+++ b/notebooks/cognee_code_graph_demo.ipynb
@@ -17,60 +17,63 @@
"metadata": {},
"outputs": [],
"source": [
- "from cognee.modules.users.methods import get_default_user\n",
+ "import os\n",
+ "import pathlib\n",
+ "import cognee\n",
+ "from cognee.infrastructure.databases.relational import create_db_and_tables\n",
"\n",
- "from cognee.modules.data.methods import get_datasets\n",
- "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
- "from cognee.modules.data.models import Data\n",
+ "notebook_path = os.path.abspath(\"\")\n",
+ "data_directory_path = str(pathlib.Path(os.path.join(notebook_path, \".data_storage/code_graph\")).resolve())\n",
+ "cognee.config.data_root_directory(data_directory_path)\n",
+ "cognee_directory_path = str(pathlib.Path(os.path.join(notebook_path, \".cognee_system/code_graph\")).resolve())\n",
+ "cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
- "from cognee.modules.pipelines.tasks.Task import Task\n",
- "from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n",
- "from cognee.tasks.graph import extract_graph_from_code\n",
+ "await cognee.prune.prune_data()\n",
+ "await cognee.prune.prune_system(metadata = True)\n",
+ "\n",
+ "await create_db_and_tables()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from os import path\n",
+ "from pathlib import Path\n",
+ "from cognee.infrastructure.files.storage import LocalStorage\n",
+ "import git\n",
+ "\n",
+ "notebook_path = path.abspath(\"\")\n",
+ "repo_clone_location = path.join(notebook_path, \"data/graphrag\")\n",
+ "\n",
+ "LocalStorage.remove_all(repo_clone_location)\n",
+ "\n",
+ "git.Repo.clone_from(\n",
+ " \"git@github.com:microsoft/graphrag.git\",\n",
+ " Path(repo_clone_location),\n",
+ " branch = \"main\",\n",
+ " single_branch = True,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from cognee.tasks.repo_processor import enrich_dependency_graph, expand_dependency_graph, get_repo_file_dependencies\n",
"from cognee.tasks.storage import add_data_points\n",
- "from cognee.shared.SourceCodeGraph import SourceCodeGraph\n",
+ "from cognee.modules.pipelines.tasks.Task import Task\n",
"\n",
- "from cognee.modules.pipelines import run_tasks\n",
- "\n",
- "from cognee.shared.utils import render_graph\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "user = await get_default_user()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "existing_datasets = await get_datasets(user.id)\n",
- "\n",
- "datasets = {}\n",
- "for dataset in existing_datasets:\n",
- " dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n",
- " data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
- " datasets[dataset_name] = data_documents\n",
- "print(datasets.keys())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
"tasks = [\n",
- " Task(classify_documents),\n",
- " Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
- " Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n",
- " Task(add_data_points, task_config = { \"batch_size\": 10 }),\n",
- " Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n",
+ " Task(get_repo_file_dependencies),\n",
+ " Task(add_data_points, task_config = { \"batch_size\": 50 }),\n",
+ " Task(enrich_dependency_graph, task_config = { \"batch_size\": 50 }),\n",
+ " Task(expand_dependency_graph, task_config = { \"batch_size\": 50 }),\n",
+ " Task(add_data_points, task_config = { \"batch_size\": 50 }),\n",
"]"
]
},
@@ -80,21 +83,15 @@
"metadata": {},
"outputs": [],
"source": [
- "async def run_codegraph_pipeline(tasks, data_documents):\n",
- " pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n",
- " results = []\n",
- " async for result in pipeline:\n",
- " results.append(result)\n",
- " return(results)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])"
+ "from cognee.modules.pipelines import run_tasks\n",
+ "\n",
+ "notebook_path = os.path.abspath(\"\")\n",
+ "repo_clone_location = os.path.join(notebook_path, \"data/graphrag\")\n",
+ "\n",
+ "pipeline = run_tasks(tasks, repo_clone_location, \"code_graph_pipeline\")\n",
+ "\n",
+ "async for result in pipeline:\n",
+ " print(result)"
]
},
{
@@ -103,6 +100,7 @@
"metadata": {},
"outputs": [],
"source": [
+ "from cognee.shared.utils import render_graph\n",
"await render_graph(None, include_nodes = True, include_labels = True)"
]
},
@@ -116,7 +114,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "cognee",
+ "display_name": ".venv",
"language": "python",
"name": "python3"
},
@@ -130,7 +128,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.10"
+ "version": "3.11.8"
}
},
"nbformat": 4,
diff --git a/poetry.lock b/poetry.lock
index 9b309de51..0f3d414bd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -6424,6 +6424,138 @@ botocore = ">=1.33.2,<2.0a.0"
[package.extras]
crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
+[[package]]
+name = "safetensors"
+version = "0.4.5"
+description = ""
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "safetensors-0.4.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7"},
+ {file = "safetensors-0.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:23fc9b4ec7b602915cbb4ec1a7c1ad96d2743c322f20ab709e2c35d1b66dad27"},
+ {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6885016f34bef80ea1085b7e99b3c1f92cb1be78a49839203060f67b40aee761"},
+ {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:133620f443450429322f238fda74d512c4008621227fccf2f8cf4a76206fea7c"},
+ {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4fb3e0609ec12d2a77e882f07cced530b8262027f64b75d399f1504ffec0ba56"},
+ {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0f1dd769f064adc33831f5e97ad07babbd728427f98e3e1db6902e369122737"},
+ {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6d156bdb26732feada84f9388a9f135528c1ef5b05fae153da365ad4319c4c5"},
+ {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e347d77e2c77eb7624400ccd09bed69d35c0332f417ce8c048d404a096c593b"},
+ {file = "safetensors-0.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9f556eea3aec1d3d955403159fe2123ddd68e880f83954ee9b4a3f2e15e716b6"},
+ {file = "safetensors-0.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9483f42be3b6bc8ff77dd67302de8ae411c4db39f7224dec66b0eb95822e4163"},
+ {file = "safetensors-0.4.5-cp310-none-win32.whl", hash = "sha256:7389129c03fadd1ccc37fd1ebbc773f2b031483b04700923c3511d2a939252cc"},
+ {file = "safetensors-0.4.5-cp310-none-win_amd64.whl", hash = "sha256:e98ef5524f8b6620c8cdef97220c0b6a5c1cef69852fcd2f174bb96c2bb316b1"},
+ {file = "safetensors-0.4.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:21f848d7aebd5954f92538552d6d75f7c1b4500f51664078b5b49720d180e47c"},
+ {file = "safetensors-0.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb07000b19d41e35eecef9a454f31a8b4718a185293f0d0b1c4b61d6e4487971"},
+ {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09dedf7c2fda934ee68143202acff6e9e8eb0ddeeb4cfc24182bef999efa9f42"},
+ {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59b77e4b7a708988d84f26de3ebead61ef1659c73dcbc9946c18f3b1786d2688"},
+ {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d3bc83e14d67adc2e9387e511097f254bd1b43c3020440e708858c684cbac68"},
+ {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39371fc551c1072976073ab258c3119395294cf49cdc1f8476794627de3130df"},
+ {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6c19feda32b931cae0acd42748a670bdf56bee6476a046af20181ad3fee4090"},
+ {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a659467495de201e2f282063808a41170448c78bada1e62707b07a27b05e6943"},
+ {file = "safetensors-0.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bad5e4b2476949bcd638a89f71b6916fa9a5cae5c1ae7eede337aca2100435c0"},
+ {file = "safetensors-0.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a3a315a6d0054bc6889a17f5668a73f94f7fe55121ff59e0a199e3519c08565f"},
+ {file = "safetensors-0.4.5-cp311-none-win32.whl", hash = "sha256:a01e232e6d3d5cf8b1667bc3b657a77bdab73f0743c26c1d3c5dd7ce86bd3a92"},
+ {file = "safetensors-0.4.5-cp311-none-win_amd64.whl", hash = "sha256:cbd39cae1ad3e3ef6f63a6f07296b080c951f24cec60188378e43d3713000c04"},
+ {file = "safetensors-0.4.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:473300314e026bd1043cef391bb16a8689453363381561b8a3e443870937cc1e"},
+ {file = "safetensors-0.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:801183a0f76dc647f51a2d9141ad341f9665602a7899a693207a82fb102cc53e"},
+ {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1524b54246e422ad6fb6aea1ac71edeeb77666efa67230e1faf6999df9b2e27f"},
+ {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3139098e3e8b2ad7afbca96d30ad29157b50c90861084e69fcb80dec7430461"},
+ {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65573dc35be9059770808e276b017256fa30058802c29e1038eb1c00028502ea"},
+ {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed"},
+ {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3685ce7ed036f916316b567152482b7e959dc754fcc4a8342333d222e05f407c"},
+ {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dde2bf390d25f67908278d6f5d59e46211ef98e44108727084d4637ee70ab4f1"},
+ {file = "safetensors-0.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7469d70d3de970b1698d47c11ebbf296a308702cbaae7fcb993944751cf985f4"},
+ {file = "safetensors-0.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a6ba28118636a130ccbb968bc33d4684c48678695dba2590169d5ab03a45646"},
+ {file = "safetensors-0.4.5-cp312-none-win32.whl", hash = "sha256:c859c7ed90b0047f58ee27751c8e56951452ed36a67afee1b0a87847d065eec6"},
+ {file = "safetensors-0.4.5-cp312-none-win_amd64.whl", hash = "sha256:b5a8810ad6a6f933fff6c276eae92c1da217b39b4d8b1bc1c0b8af2d270dc532"},
+ {file = "safetensors-0.4.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:25e5f8e2e92a74f05b4ca55686234c32aac19927903792b30ee6d7bd5653d54e"},
+ {file = "safetensors-0.4.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81efb124b58af39fcd684254c645e35692fea81c51627259cdf6d67ff4458916"},
+ {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:585f1703a518b437f5103aa9cf70e9bd437cb78eea9c51024329e4fb8a3e3679"},
+ {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b99fbf72e3faf0b2f5f16e5e3458b93b7d0a83984fe8d5364c60aa169f2da89"},
+ {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b17b299ca9966ca983ecda1c0791a3f07f9ca6ab5ded8ef3d283fff45f6bcd5f"},
+ {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76ded72f69209c9780fdb23ea89e56d35c54ae6abcdec67ccb22af8e696e449a"},
+ {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2783956926303dcfeb1de91a4d1204cd4089ab441e622e7caee0642281109db3"},
+ {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d94581aab8c6b204def4d7320f07534d6ee34cd4855688004a4354e63b639a35"},
+ {file = "safetensors-0.4.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:67e1e7cb8678bb1b37ac48ec0df04faf689e2f4e9e81e566b5c63d9f23748523"},
+ {file = "safetensors-0.4.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:dbd280b07e6054ea68b0cb4b16ad9703e7d63cd6890f577cb98acc5354780142"},
+ {file = "safetensors-0.4.5-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:77d9b228da8374c7262046a36c1f656ba32a93df6cc51cd4453af932011e77f1"},
+ {file = "safetensors-0.4.5-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:500cac01d50b301ab7bb192353317035011c5ceeef0fca652f9f43c000bb7f8d"},
+ {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75331c0c746f03158ded32465b7d0b0e24c5a22121743662a2393439c43a45cf"},
+ {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670e95fe34e0d591d0529e5e59fd9d3d72bc77b1444fcaa14dccda4f36b5a38b"},
+ {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:098923e2574ff237c517d6e840acada8e5b311cb1fa226019105ed82e9c3b62f"},
+ {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ca0902d2648775089fa6a0c8fc9e6390c5f8ee576517d33f9261656f851e3f"},
+ {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f0032bedc869c56f8d26259fe39cd21c5199cd57f2228d817a0e23e8370af25"},
+ {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4b15f51b4f8f2a512341d9ce3475cacc19c5fdfc5db1f0e19449e75f95c7dc8"},
+ {file = "safetensors-0.4.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f6594d130d0ad933d885c6a7b75c5183cb0e8450f799b80a39eae2b8508955eb"},
+ {file = "safetensors-0.4.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:60c828a27e852ded2c85fc0f87bf1ec20e464c5cd4d56ff0e0711855cc2e17f8"},
+ {file = "safetensors-0.4.5-cp37-none-win32.whl", hash = "sha256:6d3de65718b86c3eeaa8b73a9c3d123f9307a96bbd7be9698e21e76a56443af5"},
+ {file = "safetensors-0.4.5-cp37-none-win_amd64.whl", hash = "sha256:5a2d68a523a4cefd791156a4174189a4114cf0bf9c50ceb89f261600f3b2b81a"},
+ {file = "safetensors-0.4.5-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:e7a97058f96340850da0601a3309f3d29d6191b0702b2da201e54c6e3e44ccf0"},
+ {file = "safetensors-0.4.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:63bfd425e25f5c733f572e2246e08a1c38bd6f2e027d3f7c87e2e43f228d1345"},
+ {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3664ac565d0e809b0b929dae7ccd74e4d3273cd0c6d1220c6430035befb678e"},
+ {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:313514b0b9b73ff4ddfb4edd71860696dbe3c1c9dc4d5cc13dbd74da283d2cbf"},
+ {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31fa33ee326f750a2f2134a6174773c281d9a266ccd000bd4686d8021f1f3dac"},
+ {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09566792588d77b68abe53754c9f1308fadd35c9f87be939e22c623eaacbed6b"},
+ {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309aaec9b66cbf07ad3a2e5cb8a03205663324fea024ba391594423d0f00d9fe"},
+ {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:53946c5813b8f9e26103c5efff4a931cc45d874f45229edd68557ffb35ffb9f8"},
+ {file = "safetensors-0.4.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:868f9df9e99ad1e7f38c52194063a982bc88fedc7d05096f4f8160403aaf4bd6"},
+ {file = "safetensors-0.4.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9cc9449bd0b0bc538bd5e268221f0c5590bc5c14c1934a6ae359d44410dc68c4"},
+ {file = "safetensors-0.4.5-cp38-none-win32.whl", hash = "sha256:83c4f13a9e687335c3928f615cd63a37e3f8ef072a3f2a0599fa09f863fb06a2"},
+ {file = "safetensors-0.4.5-cp38-none-win_amd64.whl", hash = "sha256:b98d40a2ffa560653f6274e15b27b3544e8e3713a44627ce268f419f35c49478"},
+ {file = "safetensors-0.4.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cf727bb1281d66699bef5683b04d98c894a2803442c490a8d45cd365abfbdeb2"},
+ {file = "safetensors-0.4.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:96f1d038c827cdc552d97e71f522e1049fef0542be575421f7684756a748e457"},
+ {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:139fbee92570ecea774e6344fee908907db79646d00b12c535f66bc78bd5ea2c"},
+ {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c36302c1c69eebb383775a89645a32b9d266878fab619819ce660309d6176c9b"},
+ {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d641f5b8149ea98deb5ffcf604d764aad1de38a8285f86771ce1abf8e74c4891"},
+ {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b4db6a61d968de73722b858038c616a1bebd4a86abe2688e46ca0cc2d17558f2"},
+ {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b75a616e02f21b6f1d5785b20cecbab5e2bd3f6358a90e8925b813d557666ec1"},
+ {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:788ee7d04cc0e0e7f944c52ff05f52a4415b312f5efd2ee66389fb7685ee030c"},
+ {file = "safetensors-0.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:87bc42bd04fd9ca31396d3ca0433db0be1411b6b53ac5a32b7845a85d01ffc2e"},
+ {file = "safetensors-0.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4037676c86365a721a8c9510323a51861d703b399b78a6b4486a54a65a975fca"},
+ {file = "safetensors-0.4.5-cp39-none-win32.whl", hash = "sha256:1500418454529d0ed5c1564bda376c4ddff43f30fce9517d9bee7bcce5a8ef50"},
+ {file = "safetensors-0.4.5-cp39-none-win_amd64.whl", hash = "sha256:9d1a94b9d793ed8fe35ab6d5cea28d540a46559bafc6aae98f30ee0867000cab"},
+ {file = "safetensors-0.4.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fdadf66b5a22ceb645d5435a0be7a0292ce59648ca1d46b352f13cff3ea80410"},
+ {file = "safetensors-0.4.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d42ffd4c2259f31832cb17ff866c111684c87bd930892a1ba53fed28370c918c"},
+ {file = "safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd8a1f6d2063a92cd04145c7fd9e31a1c7d85fbec20113a14b487563fdbc0597"},
+ {file = "safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:951d2fcf1817f4fb0ef0b48f6696688a4e852a95922a042b3f96aaa67eedc920"},
+ {file = "safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ac85d9a8c1af0e3132371d9f2d134695a06a96993c2e2f0bbe25debb9e3f67a"},
+ {file = "safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e3cec4a29eb7fe8da0b1c7988bc3828183080439dd559f720414450de076fcab"},
+ {file = "safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f"},
+ {file = "safetensors-0.4.5-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c7db3006a4915151ce1913652e907cdede299b974641a83fbc092102ac41b644"},
+ {file = "safetensors-0.4.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f68bf99ea970960a237f416ea394e266e0361895753df06e3e06e6ea7907d98b"},
+ {file = "safetensors-0.4.5-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8158938cf3324172df024da511839d373c40fbfaa83e9abf467174b2910d7b4c"},
+ {file = "safetensors-0.4.5-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:540ce6c4bf6b58cb0fd93fa5f143bc0ee341c93bb4f9287ccd92cf898cc1b0dd"},
+ {file = "safetensors-0.4.5-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bfeaa1a699c6b9ed514bd15e6a91e74738b71125a9292159e3d6b7f0a53d2cde"},
+ {file = "safetensors-0.4.5-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:01c8f00da537af711979e1b42a69a8ec9e1d7112f208e0e9b8a35d2c381085ef"},
+ {file = "safetensors-0.4.5-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a0dd565f83b30f2ca79b5d35748d0d99dd4b3454f80e03dfb41f0038e3bdf180"},
+ {file = "safetensors-0.4.5-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:023b6e5facda76989f4cba95a861b7e656b87e225f61811065d5c501f78cdb3f"},
+ {file = "safetensors-0.4.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9633b663393d5796f0b60249549371e392b75a0b955c07e9c6f8708a87fc841f"},
+ {file = "safetensors-0.4.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78dd8adfb48716233c45f676d6e48534d34b4bceb50162c13d1f0bdf6f78590a"},
+ {file = "safetensors-0.4.5-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e8deb16c4321d61ae72533b8451ec4a9af8656d1c61ff81aa49f966406e4b68"},
+ {file = "safetensors-0.4.5-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:52452fa5999dc50c4decaf0c53aa28371f7f1e0fe5c2dd9129059fbe1e1599c7"},
+ {file = "safetensors-0.4.5-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d5f23198821e227cfc52d50fa989813513db381255c6d100927b012f0cfec63d"},
+ {file = "safetensors-0.4.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f4beb84b6073b1247a773141a6331117e35d07134b3bb0383003f39971d414bb"},
+ {file = "safetensors-0.4.5-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:68814d599d25ed2fdd045ed54d370d1d03cf35e02dce56de44c651f828fb9b7b"},
+ {file = "safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0b6453c54c57c1781292c46593f8a37254b8b99004c68d6c3ce229688931a22"},
+ {file = "safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adaa9c6dead67e2dd90d634f89131e43162012479d86e25618e821a03d1eb1dc"},
+ {file = "safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73e7d408e9012cd17511b382b43547850969c7979efc2bc353f317abaf23c84c"},
+ {file = "safetensors-0.4.5-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:775409ce0fcc58b10773fdb4221ed1eb007de10fe7adbdf8f5e8a56096b6f0bc"},
+ {file = "safetensors-0.4.5-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:834001bed193e4440c4a3950a31059523ee5090605c907c66808664c932b549c"},
+ {file = "safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310"},
+]
+
+[package.extras]
+all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"]
+dev = ["safetensors[all]"]
+jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"]
+mlx = ["mlx (>=0.0.9)"]
+numpy = ["numpy (>=1.21.6)"]
+paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"]
+pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"]
+quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"]
+tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"]
+testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"]
+torch = ["safetensors[numpy]", "torch (>=1.10)"]
+
[[package]]
name = "scikit-learn"
version = "1.5.2"
@@ -7055,26 +7187,123 @@ test = ["pytest", "ruff"]
[[package]]
name = "tokenizers"
-version = "0.21.0"
+version = "0.20.3"
description = ""
optional = false
python-versions = ">=3.7"
files = [
- {file = "tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2"},
- {file = "tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e"},
- {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193"},
- {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e"},
- {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e"},
- {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba"},
- {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273"},
- {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04"},
- {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e"},
- {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b"},
- {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74"},
- {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff"},
- {file = "tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a"},
- {file = "tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c"},
- {file = "tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4"},
+ {file = "tokenizers-0.20.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:31ccab28dbb1a9fe539787210b0026e22debeab1662970f61c2d921f7557f7e4"},
+ {file = "tokenizers-0.20.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6361191f762bda98c773da418cf511cbaa0cb8d0a1196f16f8c0119bde68ff8"},
+ {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f128d5da1202b78fa0a10d8d938610472487da01b57098d48f7e944384362514"},
+ {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:79c4121a2e9433ad7ef0769b9ca1f7dd7fa4c0cd501763d0a030afcbc6384481"},
+ {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7850fde24197fe5cd6556e2fdba53a6d3bae67c531ea33a3d7c420b90904141"},
+ {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b357970c095dc134978a68c67d845a1e3803ab7c4fbb39195bde914e7e13cf8b"},
+ {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a333d878c4970b72d6c07848b90c05f6b045cf9273fc2bc04a27211721ad6118"},
+ {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fd9fee817f655a8f50049f685e224828abfadd436b8ff67979fc1d054b435f1"},
+ {file = "tokenizers-0.20.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9e7816808b402129393a435ea2a509679b41246175d6e5e9f25b8692bfaa272b"},
+ {file = "tokenizers-0.20.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba96367db9d8a730d3a1d5996b4b7babb846c3994b8ef14008cd8660f55db59d"},
+ {file = "tokenizers-0.20.3-cp310-none-win32.whl", hash = "sha256:ee31ba9d7df6a98619426283e80c6359f167e2e9882d9ce1b0254937dbd32f3f"},
+ {file = "tokenizers-0.20.3-cp310-none-win_amd64.whl", hash = "sha256:a845c08fdad554fe0871d1255df85772f91236e5fd6b9287ef8b64f5807dbd0c"},
+ {file = "tokenizers-0.20.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:585b51e06ca1f4839ce7759941e66766d7b060dccfdc57c4ca1e5b9a33013a90"},
+ {file = "tokenizers-0.20.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61cbf11954f3b481d08723ebd048ba4b11e582986f9be74d2c3bdd9293a4538d"},
+ {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef820880d5e4e8484e2fa54ff8d297bb32519eaa7815694dc835ace9130a3eea"},
+ {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:67ef4dcb8841a4988cd00dd288fb95dfc8e22ed021f01f37348fd51c2b055ba9"},
+ {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff1ef8bd47a02b0dc191688ccb4da53600df5d4c9a05a4b68e1e3de4823e78eb"},
+ {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:444d188186eab3148baf0615b522461b41b1f0cd58cd57b862ec94b6ac9780f1"},
+ {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37c04c032c1442740b2c2d925f1857885c07619224a533123ac7ea71ca5713da"},
+ {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453c7769d22231960ee0e883d1005c93c68015025a5e4ae56275406d94a3c907"},
+ {file = "tokenizers-0.20.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4bb31f7b2847e439766aaa9cc7bccf7ac7088052deccdb2275c952d96f691c6a"},
+ {file = "tokenizers-0.20.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:843729bf0f991b29655a069a2ff58a4c24375a553c70955e15e37a90dd4e045c"},
+ {file = "tokenizers-0.20.3-cp311-none-win32.whl", hash = "sha256:efcce3a927b1e20ca694ba13f7a68c59b0bd859ef71e441db68ee42cf20c2442"},
+ {file = "tokenizers-0.20.3-cp311-none-win_amd64.whl", hash = "sha256:88301aa0801f225725b6df5dea3d77c80365ff2362ca7e252583f2b4809c4cc0"},
+ {file = "tokenizers-0.20.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:49d12a32e190fad0e79e5bdb788d05da2f20d8e006b13a70859ac47fecf6ab2f"},
+ {file = "tokenizers-0.20.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:282848cacfb9c06d5e51489f38ec5aa0b3cd1e247a023061945f71f41d949d73"},
+ {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe4e08c7d0cd6154c795deb5bf81d2122f36daf075e0c12a8b050d824ef0a64"},
+ {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca94fc1b73b3883c98f0c88c77700b13d55b49f1071dfd57df2b06f3ff7afd64"},
+ {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef279c7e239f95c8bdd6ff319d9870f30f0d24915b04895f55b1adcf96d6c60d"},
+ {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16384073973f6ccbde9852157a4fdfe632bb65208139c9d0c0bd0176a71fd67f"},
+ {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:312d522caeb8a1a42ebdec87118d99b22667782b67898a76c963c058a7e41d4f"},
+ {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b7cb962564785a83dafbba0144ecb7f579f1d57d8c406cdaa7f32fe32f18ad"},
+ {file = "tokenizers-0.20.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:124c5882ebb88dadae1fc788a582299fcd3a8bd84fc3e260b9918cf28b8751f5"},
+ {file = "tokenizers-0.20.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2b6e54e71f84c4202111a489879005cb14b92616a87417f6c102c833af961ea2"},
+ {file = "tokenizers-0.20.3-cp312-none-win32.whl", hash = "sha256:83d9bfbe9af86f2d9df4833c22e94d94750f1d0cd9bfb22a7bb90a86f61cdb1c"},
+ {file = "tokenizers-0.20.3-cp312-none-win_amd64.whl", hash = "sha256:44def74cee574d609a36e17c8914311d1b5dbcfe37c55fd29369d42591b91cf2"},
+ {file = "tokenizers-0.20.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0b630e0b536ef0e3c8b42c685c1bc93bd19e98c0f1543db52911f8ede42cf84"},
+ {file = "tokenizers-0.20.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a02d160d2b19bcbfdf28bd9a4bf11be4cb97d0499c000d95d4c4b1a4312740b6"},
+ {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e3d80d89b068bc30034034b5319218c7c0a91b00af19679833f55f3becb6945"},
+ {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:174a54910bed1b089226512b4458ea60d6d6fd93060254734d3bc3540953c51c"},
+ {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:098b8a632b8656aa5802c46689462c5c48f02510f24029d71c208ec2c822e771"},
+ {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78c8c143e3ae41e718588281eb3e212c2b31623c9d6d40410ec464d7d6221fb5"},
+ {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b26b0aadb18cd8701077362ba359a06683662d5cafe3e8e8aba10eb05c037f1"},
+ {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07d7851a72717321022f3774e84aa9d595a041d643fafa2e87fbc9b18711dac0"},
+ {file = "tokenizers-0.20.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bd44e48a430ada902c6266a8245f5036c4fe744fcb51f699999fbe82aa438797"},
+ {file = "tokenizers-0.20.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a4c186bb006ccbe1f5cc4e0380d1ce7806f5955c244074fd96abc55e27b77f01"},
+ {file = "tokenizers-0.20.3-cp313-none-win32.whl", hash = "sha256:6e19e0f1d854d6ab7ea0c743d06e764d1d9a546932be0a67f33087645f00fe13"},
+ {file = "tokenizers-0.20.3-cp313-none-win_amd64.whl", hash = "sha256:d50ede425c7e60966a9680d41b58b3a0950afa1bb570488e2972fa61662c4273"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:9adda1ff5fb9dcdf899ceca672a4e2ce9e797adb512a6467305ca3d8bfcfbdd0"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:6dde2cae6004ba7a3badff4a11911cae03ebf23e97eebfc0e71fef2530e5074f"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4a7fd678b35614fca708579eb95b7587a5e8a6d328171bd2488fd9f27d82be4"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b80e3c7283a01a356bd2210f53d1a4a5d32b269c2024389ed0173137708d50e"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a8cc0e8176b762973758a77f0d9c4467d310e33165fb74173418ca3734944da4"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5634b2e2f5f3d2b4439d2d74066e22eb4b1f04f3fea05cb2a3c12d89b5a3bcd"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b4ba635165bc1ea46f2da8e5d80b5f70f6ec42161e38d96dbef33bb39df73964"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18e4c7c64172e7789bd8b07aa3087ea87c4c4de7e90937a2aa036b5d92332536"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1f74909ef7675c26d4095a817ec3393d67f3158ca4836c233212e5613ef640c4"},
+ {file = "tokenizers-0.20.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0e9b81321a1e05b16487d312b4264984513f8b4a7556229cafac6e88c2036b09"},
+ {file = "tokenizers-0.20.3-cp37-none-win32.whl", hash = "sha256:ab48184cd58b4a03022a2ec75b54c9f600ffea9a733612c02325ed636f353729"},
+ {file = "tokenizers-0.20.3-cp37-none-win_amd64.whl", hash = "sha256:60ac483cebee1c12c71878523e768df02fa17e4c54412966cb3ac862c91b36c1"},
+ {file = "tokenizers-0.20.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:3229ef103c89583d10b9378afa5d601b91e6337530a0988e17ca8d635329a996"},
+ {file = "tokenizers-0.20.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6ac52cc24bad3de865c7e65b1c4e7b70d00938a8ae09a92a453b8f676e714ad5"},
+ {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04627b7b502fa6a2a005e1bd446fa4247d89abcb1afaa1b81eb90e21aba9a60f"},
+ {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c27ceb887f0e81a3c377eb4605dca7a95a81262761c0fba308d627b2abb98f2b"},
+ {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65ab780194da4e1fcf5670523a2f377c4838ebf5249efe41fa1eddd2a84fb49d"},
+ {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98d343134f47159e81f7f242264b0eb222e6b802f37173c8d7d7b64d5c9d1388"},
+ {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2475bb004ab2009d29aff13b5047bfdb3d4b474f0aa9d4faa13a7f34dbbbb43"},
+ {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b6583a65c01db1197c1eb36857ceba8ec329d53afadd268b42a6b04f4965724"},
+ {file = "tokenizers-0.20.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:62d00ba208358c037eeab7bfc00a905adc67b2d31b68ab40ed09d75881e114ea"},
+ {file = "tokenizers-0.20.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0fc7a39e5bedc817bda395a798dfe2d9c5f7c71153c90d381b5135a0328d9520"},
+ {file = "tokenizers-0.20.3-cp38-none-win32.whl", hash = "sha256:84d40ee0f8550d64d3ea92dd7d24a8557a9172165bdb986c9fb2503b4fe4e3b6"},
+ {file = "tokenizers-0.20.3-cp38-none-win_amd64.whl", hash = "sha256:205a45246ed7f1718cf3785cff88450ba603352412aaf220ace026384aa3f1c0"},
+ {file = "tokenizers-0.20.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:93e37f0269a11dc3b1a953f1fca9707f0929ebf8b4063c591c71a0664219988e"},
+ {file = "tokenizers-0.20.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f4cb0c614b0135e781de96c2af87e73da0389ac1458e2a97562ed26e29490d8d"},
+ {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7eb2fb1c432f5746b22f8a7f09fc18c4156cb0031c77f53cb19379d82d43297a"},
+ {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfa8d029bb156181b006643309d6b673615a24e4ed24cf03aa191d599b996f51"},
+ {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f90549622de3bf476ad9f1dd6f3f952ec3ed6ab8615ae88ef060d0c5bfad55d"},
+ {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1d469c74eebf5c43fd61cd9b030e271d17198edd7bd45392e03a3c091d7d6d4"},
+ {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bee8f53b2594749f4460d53253bae55d718f04e9b633efa0f5df8938bd98e4f0"},
+ {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:938441babf3e5720e4459e306ef2809fb267680df9d1ff2873458b22aef60248"},
+ {file = "tokenizers-0.20.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7310ab23d7b0caebecc0e8be11a1146f320f5f07284000f6ea54793e83de1b75"},
+ {file = "tokenizers-0.20.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:16121eb030a2b13094cfec936b0c12e8b4063c5f839591ea7d0212336d8f9921"},
+ {file = "tokenizers-0.20.3-cp39-none-win32.whl", hash = "sha256:401cc21ef642ee235985d747f65e18f639464d377c70836c9003df208d582064"},
+ {file = "tokenizers-0.20.3-cp39-none-win_amd64.whl", hash = "sha256:7498f3ea7746133335a6adb67a77cf77227a8b82c8483f644a2e5f86fea42b8d"},
+ {file = "tokenizers-0.20.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e919f2e3e68bb51dc31de4fcbbeff3bdf9c1cad489044c75e2b982a91059bd3c"},
+ {file = "tokenizers-0.20.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b8e9608f2773996cc272156e305bd79066163a66b0390fe21750aff62df1ac07"},
+ {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39270a7050deaf50f7caff4c532c01b3c48f6608d42b3eacdebdc6795478c8df"},
+ {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e005466632b1c5d2d2120f6de8aa768cc9d36cd1ab7d51d0c27a114c91a1e6ee"},
+ {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a07962340b36189b6c8feda552ea1bfeee6cf067ff922a1d7760662c2ee229e5"},
+ {file = "tokenizers-0.20.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:55046ad3dd5f2b3c67501fcc8c9cbe3e901d8355f08a3b745e9b57894855f85b"},
+ {file = "tokenizers-0.20.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:efcf0eb939988b627558aaf2b9dc3e56d759cad2e0cfa04fcab378e4b48fc4fd"},
+ {file = "tokenizers-0.20.3-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f3558a7ae6a6d38a77dfce12172a1e2e1bf3e8871e744a1861cd7591ea9ebe24"},
+ {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d53029fe44bc70c3ff14ef512460a0cf583495a0f8e2f4b70e26eb9438e38a9"},
+ {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a2a56397b2bec5a629b516b23f0f8a3e4f978c7488d4a299980f8375954b85"},
+ {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e5bfaae740ef9ece000f8a07e78ac0e2b085c5ce9648f8593ddf0243c9f76d"},
+ {file = "tokenizers-0.20.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fbaf3ea28fedfb2283da60e710aff25492e795a7397cad8a50f1e079b65a5a70"},
+ {file = "tokenizers-0.20.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c47c037116310dc976eb96b008e41b9cfaba002ed8005848d4d632ee0b7ba9ae"},
+ {file = "tokenizers-0.20.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c31751f0721f58f5e19bb27c1acc259aeff860d8629c4e1a900b26a1979ada8e"},
+ {file = "tokenizers-0.20.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:c697cbd3be7a79ea250ea5f380d6f12e534c543cfb137d5c734966b3ee4f34cc"},
+ {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b48971b88ef9130bf35b41b35fd857c3c4dae4a9cd7990ebc7fc03e59cc92438"},
+ {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e615de179bbe060ab33773f0d98a8a8572b5883dd7dac66c1de8c056c7e748c"},
+ {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da1ec842035ed9999c62e45fbe0ff14b7e8a7e02bb97688cc6313cf65e5cd755"},
+ {file = "tokenizers-0.20.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:6ee4954c1dd23aadc27958dad759006e71659d497dcb0ef0c7c87ea992c16ebd"},
+ {file = "tokenizers-0.20.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3eda46ca402751ec82553a321bf35a617b76bbed7586e768c02ccacbdda94d6d"},
+ {file = "tokenizers-0.20.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:de082392a85eb0055cc055c535bff2f0cc15d7a000bdc36fbf601a0f3cf8507a"},
+ {file = "tokenizers-0.20.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c3db46cc0647bfd88263afdb739b92017a02a87ee30945cb3e86c7e25c7c9917"},
+ {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a292392f24ab9abac5cfa8197e5a6208f2e43723420217e1ceba0b4ec77816ac"},
+ {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dcd91f4e60f62b20d83a87a84fe062035a1e3ff49a8c2bbdeb2d441c8e311f4"},
+ {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:900991a2b8ee35961b1095db7e265342e0e42a84c1a594823d5ee9f8fb791958"},
+ {file = "tokenizers-0.20.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:5a8d8261ca2133d4f98aa9627c748189502b3787537ba3d7e2beb4f7cfc5d627"},
+ {file = "tokenizers-0.20.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c4fd4d71e6deb6ddf99d8d0eab87d1d16f635898906e631914a9bae8ae9f2cfb"},
+ {file = "tokenizers-0.20.3.tar.gz", hash = "sha256:2278b34c5d0dd78e087e1ca7f9b1dcbf129d80211afa645f214bd6e051037539"},
]
[package.dependencies]
@@ -7193,6 +7422,75 @@ files = [
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
+[[package]]
+name = "transformers"
+version = "4.46.3"
+description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+ {file = "transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef"},
+ {file = "transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc"},
+]
+
+[package.dependencies]
+filelock = "*"
+huggingface-hub = ">=0.23.2,<1.0"
+numpy = ">=1.17"
+packaging = ">=20.0"
+pyyaml = ">=5.1"
+regex = "!=2019.12.17"
+requests = "*"
+safetensors = ">=0.4.1"
+tokenizers = ">=0.20,<0.21"
+tqdm = ">=4.27"
+
+[package.extras]
+accelerate = ["accelerate (>=0.26.0)"]
+agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"]
+all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision"]
+audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+benchmark = ["optimum-benchmark (>=0.3.0)"]
+codecarbon = ["codecarbon (==1.2.0)"]
+deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"]
+deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.20,<0.21)", "urllib3 (<2.0.0)"]
+dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "libcst", "librosa", "nltk (<=3.8.1)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
+flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+ftfy = ["ftfy"]
+integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"]
+ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
+modelcreation = ["cookiecutter (==1.7.3)"]
+natten = ["natten (>=0.14.6,<0.15.0)"]
+onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"]
+onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
+optuna = ["optuna"]
+quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "libcst", "rich", "ruff (==0.5.1)", "urllib3 (<2.0.0)"]
+ray = ["ray[tune] (>=2.7.0)"]
+retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
+ruff = ["ruff (==0.5.1)"]
+sagemaker = ["sagemaker (>=2.31.0)"]
+sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
+serving = ["fastapi", "pydantic", "starlette", "uvicorn"]
+sigopt = ["sigopt"]
+sklearn = ["scikit-learn"]
+speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
+testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
+tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"]
+tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+tiktoken = ["blobfile", "tiktoken"]
+timm = ["timm (<=0.9.16)"]
+tokenizers = ["tokenizers (>=0.20,<0.21)"]
+torch = ["accelerate (>=0.26.0)", "torch"]
+torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
+torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
+torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.20,<0.21)", "torch", "tqdm (>=4.27)"]
+video = ["av (==9.2.0)"]
+vision = ["Pillow (>=10.0.1,<=15.0)"]
+
[[package]]
name = "tweepy"
version = "4.14.0"
@@ -7840,4 +8138,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
-content-hash = "e2360f4be222743bb83b1e7316185c5f62bd73c0baaab3eee984e1c84f1cea65"
+content-hash = "54118c733c3852c7685873a6a75789544475df45117488fa9c7071fd5bde1e8b"
diff --git a/pyproject.toml b/pyproject.toml
index 46d0a89a6..33a9ebf55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,7 @@ pgvector = "^0.3.5"
psycopg2 = {version = "^2.9.10", optional = true}
llama-index-core = {version = "^0.11.22", optional = true}
deepeval = {version = "^2.0.1", optional = true}
+transformers = "^4.46.3"
[tool.poetry.extras]
filesystem = ["s3fs", "botocore"]
@@ -96,6 +97,7 @@ debugpy = "1.8.2"
pylint = "^3.0.3"
ruff = "^0.2.2"
tweepy = "4.14.0"
+gitpython = "^3.1.43"
[tool.poetry.group.docs.dependencies]
mkdocs-material = "^9.5.42"