From 491dc4967925327d57f5d1d230072df4c46c016e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 14:00:17 +0100 Subject: [PATCH 01/20] refactor: Change proxy url to be to custom domain Added custom domain for proxy use Refactor COG-741 --- cognee/shared/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index a1792a2ed..315e234f1 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -17,7 +17,7 @@ from uuid import uuid4 import pathlib # Analytics Proxy Url, currently hosted by Vercel -vercel_url = "https://proxyanalytics.vercel.app" +proxy_url = "https://test.prometh.ai" def get_anonymous_id(): """Creates or reads a anonymous user id""" @@ -57,7 +57,7 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}): }, } - response = requests.post(vercel_url, json=payload) + response = requests.post(proxy_url, json=payload) if response.status_code != 200: print(f"Error sending telemetry through proxy: {response.status_code}") From 10dc6b152474eb5623145670857bc62f21922aec Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 14:38:41 +0100 Subject: [PATCH 02/20] chore: Add optional dependencies Change intended optional dependencies to really be optional Chore COG-595 --- poetry.lock | 32 ++++++++++++++++---------------- pyproject.toml | 10 +++++----- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/poetry.lock b/poetry.lock index 96f9aec27..08098caad 100644 --- a/poetry.lock +++ b/poetry.lock @@ -406,7 +406,7 @@ files = [ name = "asyncpg" version = "0.30.0" description = "An asyncio PostgreSQL driver" -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"}, @@ -491,7 +491,7 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] name = "authlib" version = "1.3.2" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "Authlib-1.3.2-py2.py3-none-any.whl", hash = "sha256:ede026a95e9f5cdc2d4364a52103f5405e75aa156357e831ef2bfd0bc5094dfc"}, @@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5" name = "grpcio" version = "1.67.1" description = "HTTP/2-based RPC framework" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, @@ -2108,7 +2108,7 @@ protobuf = ["grpcio-tools (>=1.67.1)"] name = "grpcio-health-checking" version = "1.67.1" description = "Standard Health Checking Service for gRPC" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "grpcio_health_checking-1.67.1-py3-none-any.whl", hash = "sha256:93753da5062152660aef2286c9b261e07dd87124a65e4dc9fbd47d1ce966b39d"}, @@ -2123,7 +2123,7 @@ protobuf = ">=5.26.1,<6.0dev" name = "grpcio-tools" version = "1.67.1" description = "Protobuf code generator for gRPC" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"}, @@ -2223,7 +2223,7 @@ files = [ name = "h2" version = "4.1.0" description = "HTTP/2 State-Machine based protocol implementation" -optional = false +optional = true python-versions = ">=3.6.1" files = [ {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, @@ -2254,7 +2254,7 @@ test = ["eth-utils (>=2.0.0)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>=7.0 name = "hpack" version = "4.0.0" description = "Pure-Python HPACK header compression" -optional = false +optional = true python-versions = ">=3.6.1" files = [ {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, @@ -2369,7 +2369,7 @@ tests = ["freezegun", "pytest", "pytest-cov"] name = "hyperframe" version = "6.0.1" description = "HTTP/2 framing layer for Python" -optional = false +optional = true python-versions = ">=3.6.1" files = [ {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, @@ -4067,7 +4067,7 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] name = "neo4j" version = "5.26.0" description = "Neo4j Bolt driver for Python" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "neo4j-5.26.0-py3-none-any.whl", hash = "sha256:511a6a9468ca89b521bf686f885a2070acc462b1d09821d43710bd477acdf11e"}, @@ -4613,7 +4613,7 @@ ptyprocess = ">=0.5" name = "pgvector" version = "0.3.6" description = "pgvector support for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pgvector-0.3.6-py3-none-any.whl", hash = "sha256:f6c269b3c110ccb7496bac87202148ed18f34b390a0189c783e351062400a75a"}, @@ -4761,7 +4761,7 @@ files = [ name = "portalocker" version = "2.10.1" description = "Wraps the portalocker recipe for easy usage" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"}, @@ -4938,7 +4938,7 @@ files = [ name = "protobuf" version = "5.28.3" description = "" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"}, @@ -5739,7 +5739,7 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} name = "qdrant-client" version = "1.12.1" description = "Client library for the Qdrant vector search engine" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"}, @@ -7144,7 +7144,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "validators" version = "0.33.0" description = "Python Data Validation for Humans™" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "validators-0.33.0-py3-none-any.whl", hash = "sha256:134b586a98894f8139865953899fc2daeb3d0c35569552c5518f089ae43ed075"}, @@ -7211,7 +7211,7 @@ files = [ name = "weaviate-client" version = "4.6.7" description = "A python native Weaviate client" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "weaviate_client-4.6.7-py3-none-any.whl", hash = "sha256:8793de35264cab33a84fe8cb8c422a257fe4d8334657aaddd8ead853da3fb34a"}, @@ -7641,4 +7641,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "474ae44ef721bf9b2d34d1cd139cddf42542ef9167895960784b6e88214dd1e6" +content-hash = "bf097b51f6147b82c63f67c34885981c2166b50755b0f3e690ab1c1e55fa56ee" diff --git a/pyproject.toml b/pyproject.toml index 92b70db63..b6fc10815 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,13 +43,13 @@ filetype = "^1.2.0" nltk = "^3.8.1" dlt = {extras = ["sqlalchemy"], version = "^1.3.0"} aiofiles = "^23.2.1" -qdrant-client = "^1.9.0" +qdrant-client = {version = "^1.9.0", optional = true} graphistry = "^0.33.5" tenacity = "^8.4.1" -weaviate-client = "4.6.7" +weaviate-client = {version = "4.6.7", optional = true} scikit-learn = "^1.5.0" pypdf = "^4.1.0" -neo4j = "^5.20.0" +neo4j = {version = "^5.20.0", optional = true} jinja2 = "^3.1.3" matplotlib = "^3.8.3" tiktoken = "0.7.0" @@ -66,8 +66,8 @@ anthropic = "^0.26.1" sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"} fastapi-users = {version = "*", extras = ["sqlalchemy"]} alembic = "^1.13.3" -asyncpg = "0.30.0" -pgvector = "^0.3.5" +asyncpg = {version = "0.30.0", optional = true} +pgvector = {version = "^0.3.5", optional = true} psycopg2 = {version = "^2.9.10", optional = true} llama-index-core = {version = "^0.11.22", optional = true} From f41228aa51b0c0a254a0fcc0465a115b6e2fe538 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 14:52:24 +0100 Subject: [PATCH 03/20] fix: Resolve import of optional package Resolve issue with forced import of optional pgvector package. Fix COG-595 --- .../databases/vector/pgvector/PGVectorAdapter.py | 4 +++- .../databases/vector/pgvector/create_db_and_tables.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index fd0fd493c..74a32511d 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -1,6 +1,5 @@ import asyncio from uuid import UUID -from pgvector.sqlalchemy import Vector from typing import List, Optional, get_type_hints from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy import JSON, Column, Table, select, delete @@ -68,6 +67,8 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): vector_size = self.embedding_engine.get_vector_size() if not await self.has_collection(collection_name): + + from pgvector.sqlalchemy import Vector class PGVectorDataPoint(Base): __tablename__ = collection_name __table_args__ = {"extend_existing": True} @@ -105,6 +106,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): vector_size = self.embedding_engine.get_vector_size() + from pgvector.sqlalchemy import Vector class PGVectorDataPoint(Base): __tablename__ = collection_name __table_args__ = {"extend_existing": True} diff --git a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py index f40299939..1900cfe88 100644 --- a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +++ b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py @@ -1,4 +1,3 @@ -from ...relational.ModelBase import Base from ..get_vector_engine import get_vector_engine, get_vectordb_config from sqlalchemy import text From 13b79320b62cddeef6217665883cb8757e510132 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 14:54:20 +0100 Subject: [PATCH 04/20] fix: Resolve issue with gh actions not installing optional packages Add install of optional databases packages for database gh actions Fix COG-595 --- .github/workflows/test_neo4j.yml | 2 +- .github/workflows/test_qdrant.yml | 2 +- .github/workflows/test_weaviate.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_neo4j.yml b/.github/workflows/test_neo4j.yml index 0b47a55fc..69aca2473 100644 --- a/.github/workflows/test_neo4j.yml +++ b/.github/workflows/test_neo4j.yml @@ -46,7 +46,7 @@ jobs: installer-parallel: true - name: Install dependencies - run: poetry install --no-interaction + run: poetry install -E neo4j --no-interaction - name: Run default Neo4j env: diff --git a/.github/workflows/test_qdrant.yml b/.github/workflows/test_qdrant.yml index a6347bd0d..4291801f3 100644 --- a/.github/workflows/test_qdrant.yml +++ b/.github/workflows/test_qdrant.yml @@ -47,7 +47,7 @@ jobs: installer-parallel: true - name: Install dependencies - run: poetry install --no-interaction + run: poetry install -E qdrant --no-interaction - name: Run default Qdrant env: diff --git a/.github/workflows/test_weaviate.yml b/.github/workflows/test_weaviate.yml index 490f9075a..67968e6a6 100644 --- a/.github/workflows/test_weaviate.yml +++ b/.github/workflows/test_weaviate.yml @@ -47,7 +47,7 @@ jobs: installer-parallel: true - name: Install dependencies - run: poetry install --no-interaction + run: poetry install -E weaviate --no-interaction - name: Run default Weaviate env: From 63e687978ebd0300613035dd8094a3ded8686279 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 15:30:30 +0100 Subject: [PATCH 05/20] chore: Remove falkordb and posthog non optional dependency Switched falkordb and posthog to be optional dependencies Chore COG-595 --- poetry.lock | 12 +++++++----- pyproject.toml | 6 ++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 08098caad..e0848cbab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1485,7 +1485,7 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth name = "falkordb" version = "1.0.9" description = "Python client for interacting with FalkorDB database" -optional = false +optional = true python-versions = "<4.0,>=3.8" files = [ {file = "falkordb-1.0.9.tar.gz", hash = "sha256:177008e63c7e4d9ebbdfeb8cad24b0e49175bb0f6e96cac9b4ffb641c0eff0f1"}, @@ -3783,7 +3783,7 @@ mkdocstrings = ">=0.26" name = "monotonic" version = "1.6" description = "An implementation of time.monotonic() for Python 2 & < 3.3" -optional = false +optional = true python-versions = "*" files = [ {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, @@ -4780,7 +4780,7 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p name = "posthog" version = "3.7.0" description = "Integrate PostHog into any python application." -optional = false +optional = true python-versions = "*" files = [ {file = "posthog-3.7.0-py2.py3-none-any.whl", hash = "sha256:3555161c3a9557b5666f96d8e1f17f410ea0f07db56e399e336a1656d4e5c722"}, @@ -5763,7 +5763,7 @@ fastembed-gpu = ["fastembed-gpu (==0.3.6)"] name = "redis" version = "5.2.0" description = "Python client for Redis database and key-value store" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "redis-5.2.0-py3-none-any.whl", hash = "sha256:ae174f2bb3b1bf2b09d54bf3e51fbc1469cf6c10aa03e21141f51969801a7897"}, @@ -7630,15 +7630,17 @@ type = ["pytest-mypy"] [extras] cli = [] +falkordb = ["falkordb"] filesystem = ["botocore"] llama-index = ["llama-index-core"] neo4j = ["neo4j"] notebook = [] postgres = ["asyncpg", "pgvector", "psycopg2"] +posthog = ["posthog"] qdrant = ["qdrant-client"] weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "bf097b51f6147b82c63f67c34885981c2166b50755b0f3e690ab1c1e55fa56ee" +content-hash = "12148f1911ef4d74f01e6cfd7ee071ea8a9dfe435c55ab6124ac811669450e67" diff --git a/pyproject.toml b/pyproject.toml index b6fc10815..9bebc33b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ typing_extensions = "4.12.2" nest_asyncio = "1.6.0" numpy = "1.26.4" datasets = "3.1.0" -falkordb = "1.0.9" +falkordb = {version = "1.0.9", optional = true} boto3 = "^1.26.125" botocore="^1.35.54" gunicorn = "^20.1.0" @@ -56,7 +56,7 @@ tiktoken = "0.7.0" langchain_text_splitters = "0.3.2" langsmith = "0.1.139" langdetect = "1.0.9" -posthog = "^3.5.0" +posthog = {version = "^3.5.0", optional = true} lancedb = "0.15.0" litellm = "1.49.1" groq = "0.8.0" @@ -80,6 +80,8 @@ neo4j = ["neo4j"] postgres = ["psycopg2", "pgvector", "asyncpg"] notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] llama-index = ["llama-index-core"] +posthog = ["posthog"] +falkordb = ["falkordb"] [tool.poetry.group.dev.dependencies] From 836e3d29e178e8afe3fa39c5c52335eab6f93b80 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 16:24:12 +0100 Subject: [PATCH 06/20] chore: Update README.md Update README.md with database optional dependencies Chore COG-595 --- README.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/README.md b/README.md index 1cec3f051..d0b7ba778 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,24 @@ pip install cognee pip install 'cognee[postgres]' ``` +### With pip with Weaviate support + +```bash +pip install 'cognee[weaviate]' +``` + +### With pip with Qdrant support + +```bash +pip install 'cognee[qdrant]' +``` + +### With pip with Neo4j support + +```bash +pip install 'cognee[neo4j]' +``` + ### With poetry ```bash @@ -44,6 +62,24 @@ poetry add cognee poetry add cognee -E postgres ``` +### With poetry with Weaviate support + +```bash +poetry add cognee -E weaviate +``` + +### With poetry with Qdrant support + +```bash +poetry add cognee -E qdrant +``` + +### With poetry with Neo4j support + +```bash +poetry add cognee -E neo4j +``` + ## 💻 Basic Usage From ad56ff9c1aac775a5e3fb6d9329bfc38c11286f9 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 16:47:26 +0100 Subject: [PATCH 07/20] chore: Add langchain as optional dependency Added langchain as optional dependency Chore COG-595 --- poetry.lock | 136 ++++++++++++++++++++++++------------------------- pyproject.toml | 10 ++-- 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/poetry.lock b/poetry.lock index e0848cbab..7584a106d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -247,7 +247,7 @@ trio = ["trio (>=0.26.1)"] name = "appnope" version = "0.1.4" description = "Disable App Nap on macOS >= 10.9" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"}, @@ -315,7 +315,7 @@ tests = ["pytest"] name = "arrow" version = "1.3.0" description = "Better dates & times for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80"}, @@ -348,7 +348,7 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" -optional = false +optional = true python-versions = "*" files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -381,7 +381,7 @@ wheel = ">=0.23.0,<1.0" name = "async-lru" version = "2.0.4" description = "Simple LRU cache for asyncio" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"}, @@ -570,7 +570,7 @@ typecheck = ["mypy"] name = "beautifulsoup4" version = "4.12.3" description = "Screen-scraping library" -optional = false +optional = true python-versions = ">=3.6.0" files = [ {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, @@ -591,7 +591,7 @@ lxml = ["lxml"] name = "bleach" version = "6.2.0" description = "An easy safelist-based HTML-sanitizing tool." -optional = false +optional = true python-versions = ">=3.9" files = [ {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"}, @@ -889,7 +889,7 @@ files = [ name = "comm" version = "0.2.2" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, @@ -1226,7 +1226,7 @@ files = [ name = "decorator" version = "5.1.1" description = "Decorators for Humans" -optional = false +optional = true python-versions = ">=3.5" files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, @@ -1237,7 +1237,7 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, @@ -1471,7 +1471,7 @@ test = ["pytest (>=6)"] name = "executing" version = "2.1.0" description = "Get the currently executing AST node of a frame, and other information" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf"}, @@ -1558,7 +1558,7 @@ sqlalchemy = {version = ">=2.0.0,<2.1.0", extras = ["asyncio"]} name = "fastjsonschema" version = "2.20.0" description = "Fastest Python implementation of JSON schema" -optional = false +optional = true python-versions = "*" files = [ {file = "fastjsonschema-2.20.0-py3-none-any.whl", hash = "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a"}, @@ -1670,7 +1670,7 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] name = "fqdn" version = "1.5.1" description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers" -optional = false +optional = true python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4" files = [ {file = "fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014"}, @@ -2483,7 +2483,7 @@ vertexai = ["google-cloud-aiplatform (>=1.53.0,<2.0.0)", "jsonref (>=1.1.0,<2.0. name = "ipykernel" version = "6.29.5" description = "IPython Kernel for Jupyter" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5"}, @@ -2516,7 +2516,7 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio name = "ipython" version = "8.18.1" description = "IPython: Productive Interactive Computing" -optional = false +optional = true python-versions = ">=3.9" files = [ {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, @@ -2553,7 +2553,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pa name = "isoduration" version = "20.11.0" description = "Operations with ISO 8601 durations" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042"}, @@ -2581,7 +2581,7 @@ colors = ["colorama (>=0.4.6)"] name = "jedi" version = "0.19.2" description = "An autocompletion tool for Python that can be used for text editors." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"}, @@ -2719,7 +2719,7 @@ files = [ name = "json5" version = "0.9.27" description = "A Python implementation of the JSON5 data format." -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "json5-0.9.27-py3-none-any.whl", hash = "sha256:17b43d78d3a6daeca4d7030e9bf22092dba29b1282cc2d0cfa56f6febee8dc93"}, @@ -2733,7 +2733,7 @@ dev = ["build (==1.2.1)", "coverage (==7.5.3)", "mypy (==1.10.0)", "pip (==24.1) name = "jsonpatch" version = "1.33" description = "Apply JSON-Patches (RFC 6902)" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, @@ -2760,7 +2760,7 @@ ply = "*" name = "jsonpointer" version = "3.0.0" description = "Identify specific nodes in a JSON document (RFC 6901)" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, @@ -2814,7 +2814,7 @@ referencing = ">=0.31.0" name = "jupyter-client" version = "8.6.3" description = "Jupyter protocol implementation and client libraries" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f"}, @@ -2837,7 +2837,7 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt name = "jupyter-core" version = "5.7.2" description = "Jupyter core package. A base package on which Jupyter projects rely." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"}, @@ -2857,7 +2857,7 @@ test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout" name = "jupyter-events" version = "0.10.0" description = "Jupyter Event System library" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter_events-0.10.0-py3-none-any.whl", hash = "sha256:4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960"}, @@ -2882,7 +2882,7 @@ test = ["click", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.19.0)", "p name = "jupyter-lsp" version = "2.2.5" description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter-lsp-2.2.5.tar.gz", hash = "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001"}, @@ -2897,7 +2897,7 @@ jupyter-server = ">=1.1.2" name = "jupyter-server" version = "2.14.2" description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter_server-2.14.2-py3-none-any.whl", hash = "sha256:47ff506127c2f7851a17bf4713434208fc490955d0e8632e95014a9a9afbeefd"}, @@ -2933,7 +2933,7 @@ test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0,<9)", "pytest-console name = "jupyter-server-terminals" version = "0.5.3" description = "A Jupyter Server Extension Providing Terminals." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa"}, @@ -2952,7 +2952,7 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (> name = "jupyterlab" version = "4.2.5" description = "JupyterLab computational environment" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyterlab-4.2.5-py3-none-any.whl", hash = "sha256:73b6e0775d41a9fee7ee756c80f58a6bed4040869ccc21411dc559818874d321"}, @@ -2987,7 +2987,7 @@ upgrade-extension = ["copier (>=9,<10)", "jinja2-time (<0.3)", "pydantic (<3.0)" name = "jupyterlab-pygments" version = "0.3.0" description = "Pygments theme using JupyterLab CSS variables" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780"}, @@ -2998,7 +2998,7 @@ files = [ name = "jupyterlab-server" version = "2.27.3" description = "A set of server components for JupyterLab and JupyterLab like applications." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyterlab_server-2.27.3-py3-none-any.whl", hash = "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4"}, @@ -3181,7 +3181,7 @@ tests = ["aiohttp", "boto3", "duckdb", "pandas (>=1.4)", "polars (>=0.19,<=1.3.0 name = "langchain-core" version = "0.3.15" description = "Building applications with LLMs through composability" -optional = false +optional = true python-versions = "<4.0,>=3.9" files = [ {file = "langchain_core-0.3.15-py3-none-any.whl", hash = "sha256:3d4ca6dbb8ed396a6ee061063832a2451b0ce8c345570f7b086ffa7288e4fa29"}, @@ -3201,7 +3201,7 @@ typing-extensions = ">=4.7" name = "langchain-text-splitters" version = "0.3.2" description = "LangChain text splitting utilities" -optional = false +optional = true python-versions = "<4.0,>=3.9" files = [ {file = "langchain_text_splitters-0.3.2-py3-none-any.whl", hash = "sha256:0db28c53f41d1bc024cdb3b1646741f6d46d5371e90f31e7e7c9fbe75d01c726"}, @@ -3254,7 +3254,7 @@ openai = ["openai (>=0.27.8)"] name = "langsmith" version = "0.1.139" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ {file = "langsmith-0.1.139-py3-none-any.whl", hash = "sha256:2a4a541bfbd0a9727255df28a60048c85bc8c4c6a276975923785c3fd82dc879"}, @@ -3559,7 +3559,7 @@ dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setupto name = "matplotlib-inline" version = "0.1.7" description = "Inline Matplotlib backend for Jupyter" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, @@ -3606,7 +3606,7 @@ files = [ name = "mistune" version = "3.0.2" description = "A sane and fast Markdown parser with useful plugins and renderers" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205"}, @@ -3986,7 +3986,7 @@ files = [ name = "nbclient" version = "0.10.0" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "nbclient-0.10.0-py3-none-any.whl", hash = "sha256:f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f"}, @@ -4008,7 +4008,7 @@ test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>= name = "nbconvert" version = "7.16.4" description = "Converting Jupyter Notebooks (.ipynb files) to other formats. Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script. nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3"}, @@ -4046,7 +4046,7 @@ webpdf = ["playwright"] name = "nbformat" version = "5.10.4" description = "The Jupyter Notebook format" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b"}, @@ -4140,7 +4140,7 @@ twitter = ["twython"] name = "notebook" version = "7.2.2" description = "Jupyter Notebook - A web-based notebook environment for interactive computing" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "notebook-7.2.2-py3-none-any.whl", hash = "sha256:c89264081f671bc02eec0ed470a627ed791b9156cad9285226b31611d3e9fe1c"}, @@ -4163,7 +4163,7 @@ test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4 name = "notebook-shim" version = "0.2.4" description = "A shim layer for notebook traits and config" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "notebook_shim-0.2.4-py3-none-any.whl", hash = "sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef"}, @@ -4447,7 +4447,7 @@ xml = ["lxml (>=4.6.3)"] name = "pandocfilters" version = "1.5.1" description = "Utilities for writing pandoc filters in python" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc"}, @@ -4458,7 +4458,7 @@ files = [ name = "parso" version = "0.8.4" description = "A Python Parser" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, @@ -4599,7 +4599,7 @@ test = ["time-machine (>=2.6.0)"] name = "pexpect" version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." -optional = false +optional = true python-versions = "*" files = [ {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, @@ -4803,7 +4803,7 @@ test = ["coverage", "django", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)" name = "prometheus-client" version = "0.21.0" description = "Python client for the Prometheus monitoring system." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "prometheus_client-0.21.0-py3-none-any.whl", hash = "sha256:4fa6b4dd0ac16d58bb587c04b1caae65b8c5043e85f778f42f5f632f6af2e166"}, @@ -4817,7 +4817,7 @@ twisted = ["twisted"] name = "prompt-toolkit" version = "3.0.48" description = "Library for building powerful interactive command lines in Python" -optional = false +optional = true python-versions = ">=3.7.0" files = [ {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"}, @@ -4958,7 +4958,7 @@ files = [ name = "psutil" version = "6.1.0" description = "Cross-platform lib for process and system monitoring in Python." -optional = false +optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ {file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"}, @@ -5006,7 +5006,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -optional = false +optional = true python-versions = "*" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -5017,7 +5017,7 @@ files = [ name = "pure-eval" version = "0.2.3" description = "Safely evaluate AST nodes without side effects" -optional = false +optional = true python-versions = "*" files = [ {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"}, @@ -5467,7 +5467,7 @@ cli = ["click (>=5.0)"] name = "python-json-logger" version = "2.0.7" description = "A python library adding a json log formatter" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c"}, @@ -5500,7 +5500,7 @@ files = [ name = "pywin32" version = "308" description = "Python for Window Extensions" -optional = false +optional = true python-versions = "*" files = [ {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"}, @@ -5527,7 +5527,7 @@ files = [ name = "pywinpty" version = "2.0.14" description = "Pseudo terminal support for Windows from Python." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pywinpty-2.0.14-cp310-none-win_amd64.whl", hash = "sha256:0b149c2918c7974f575ba79f5a4aad58bd859a52fa9eb1296cc22aa412aa411f"}, @@ -5618,7 +5618,7 @@ pyyaml = "*" name = "pyzmq" version = "26.2.0" description = "Python bindings for 0MQ" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629"}, @@ -5938,7 +5938,7 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"] name = "requests-toolbelt" version = "1.0.0" description = "A utility belt for advanced users of python-requests" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, @@ -5967,7 +5967,7 @@ types-setuptools = ">=69.1.0" name = "rfc3339-validator" version = "0.1.4" description = "A pure python RFC3339 validator" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, @@ -5981,7 +5981,7 @@ six = "*" name = "rfc3986-validator" version = "0.1.1" description = "Pure python rfc3986 validator" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ {file = "rfc3986_validator-0.1.1-py2.py3-none-any.whl", hash = "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9"}, @@ -6256,7 +6256,7 @@ files = [ name = "send2trash" version = "1.8.3" description = "Send file to trash natively under Mac OS X, Windows and Linux" -optional = false +optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ {file = "Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9"}, @@ -6510,7 +6510,7 @@ files = [ name = "soupsieve" version = "2.6" description = "A modern CSS selector implementation for Beautiful Soup." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, @@ -6619,7 +6619,7 @@ files = [ name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -optional = false +optional = true python-versions = "*" files = [ {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, @@ -6671,7 +6671,7 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"] name = "terminado" version = "0.18.1" description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0"}, @@ -6755,7 +6755,7 @@ blobfile = ["blobfile (>=2)"] name = "tinycss2" version = "1.4.0" description = "A tiny CSS parser" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289"}, @@ -6924,7 +6924,7 @@ files = [ name = "tornado" version = "6.4.1" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"}, @@ -6965,7 +6965,7 @@ telegram = ["requests"] name = "traitlets" version = "5.14.3" description = "Traitlets Python configuration system" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, @@ -7020,7 +7020,7 @@ typing-extensions = ">=3.7.4.3" name = "types-python-dateutil" version = "2.9.0.20241003" description = "Typing stubs for python-dateutil" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, @@ -7079,7 +7079,7 @@ files = [ name = "uri-template" version = "1.3.0" description = "RFC 6570 URI Template Processor" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "uri-template-1.3.0.tar.gz", hash = "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7"}, @@ -7200,7 +7200,7 @@ watchmedo = ["PyYAML (>=3.10)"] name = "wcwidth" version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = false +optional = true python-versions = "*" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, @@ -7232,7 +7232,7 @@ validators = "0.33.0" name = "webcolors" version = "24.11.1" description = "A library for working with the color formats defined by HTML and CSS." -optional = false +optional = true python-versions = ">=3.9" files = [ {file = "webcolors-24.11.1-py3-none-any.whl", hash = "sha256:515291393b4cdf0eb19c155749a096f779f7d909f7cceea072791cb9095b92e9"}, @@ -7243,7 +7243,7 @@ files = [ name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" -optional = false +optional = true python-versions = "*" files = [ {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, @@ -7254,7 +7254,7 @@ files = [ name = "websocket-client" version = "1.8.0" description = "WebSocket client for Python with low level API options" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, @@ -7629,9 +7629,9 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -cli = [] falkordb = ["falkordb"] filesystem = ["botocore"] +langchain = ["langchain_text_splitters", "langsmith"] llama-index = ["llama-index-core"] neo4j = ["neo4j"] notebook = [] @@ -7643,4 +7643,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "12148f1911ef4d74f01e6cfd7ee071ea8a9dfe435c55ab6124ac811669450e67" +content-hash = "c707b7e45a49a3c8c00b71292ca42898f2c5905f1e62bc7c9bc48d7423e9d990" diff --git a/pyproject.toml b/pyproject.toml index 9bebc33b6..019415ee1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,8 +53,8 @@ neo4j = {version = "^5.20.0", optional = true} jinja2 = "^3.1.3" matplotlib = "^3.8.3" tiktoken = "0.7.0" -langchain_text_splitters = "0.3.2" -langsmith = "0.1.139" +langchain_text_splitters = {version = "0.3.2", optional = true} +langsmith = {version = "0.1.139", optional = true} langdetect = "1.0.9" posthog = {version = "^3.5.0", optional = true} lancedb = "0.15.0" @@ -73,12 +73,12 @@ llama-index-core = {version = "^0.11.22", optional = true} [tool.poetry.extras] filesystem = ["s3fs", "botocore"] -cli = ["pipdeptree", "cron-descriptor"] weaviate = ["weaviate-client"] qdrant = ["qdrant-client"] neo4j = ["neo4j"] postgres = ["psycopg2", "pgvector", "asyncpg"] -notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] +notebook = ["notebook", "ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] +langchain = ["langsmith", "langchain_text_splitters"] llama-index = ["llama-index-core"] posthog = ["posthog"] falkordb = ["falkordb"] @@ -89,7 +89,7 @@ pytest = "^7.4.0" pytest-asyncio = "^0.21.1" coverage = "^7.3.2" mypy = "^1.7.1" -notebook = "^7.1.1" +notebook = {version = "^7.1.1", optional = true} deptry = "^0.20.0" debugpy = "1.8.2" pylint = "^3.0.3" From 1a5f0fe10d966480b8b0d983ab87c7459ab78553 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 19:07:17 +0100 Subject: [PATCH 08/20] chore: Update readme with more description about extras Update readme to be a bit more descriptive about installation Chore COG-595 --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index d0b7ba778..2b29f1448 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,9 @@ If you have questions, join our Discord ## 📦 Installation +You can install Cognee using either **pip** or **poetry**. +Support for various databases and vector stores is available through extras. + ### With pip ```bash From dcb320da525eee644e675fa044f46ffdb335f0c1 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 2 Dec 2024 19:35:50 +0100 Subject: [PATCH 09/20] chore: Add groq and langfuse as optional dependencies Added groq and langfuse as optional dependencies Chore COG-595 --- poetry.lock | 12 +++++++----- pyproject.toml | 6 ++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7584a106d..7d09c340d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -519,7 +519,7 @@ dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] name = "backoff" version = "2.2.1" description = "Function decoration for backoff and retry" -optional = false +optional = true python-versions = ">=3.7,<4.0" files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, @@ -2022,7 +2022,7 @@ colorama = ">=0.4" name = "groq" version = "0.8.0" description = "The official Python library for the groq API" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "groq-0.8.0-py3-none-any.whl", hash = "sha256:f5e4e892d45001241a930db451e633ca1f0007e3f749deaa5d7360062fcd61e3"}, @@ -3229,7 +3229,7 @@ six = "*" name = "langfuse" version = "2.53.9" description = "A client library for accessing langfuse" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ {file = "langfuse-2.53.9-py3-none-any.whl", hash = "sha256:04363bc323f7513621c88a997003f7b906ae8f5d096bd54221cfcb6bf7a6f16a"}, @@ -7306,7 +7306,7 @@ files = [ name = "wrapt" version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, @@ -7631,7 +7631,9 @@ type = ["pytest-mypy"] [extras] falkordb = ["falkordb"] filesystem = ["botocore"] +groq = ["groq"] langchain = ["langchain_text_splitters", "langsmith"] +langfuse = ["langfuse"] llama-index = ["llama-index-core"] neo4j = ["neo4j"] notebook = [] @@ -7643,4 +7645,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "c707b7e45a49a3c8c00b71292ca42898f2c5905f1e62bc7c9bc48d7423e9d990" +content-hash = "6b57d44b0924bcf64397b3807c2a6ba369166e1d2102b5312c8f8ae2d5323376" diff --git a/pyproject.toml b/pyproject.toml index 019415ee1..6fac2597c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,8 +59,8 @@ langdetect = "1.0.9" posthog = {version = "^3.5.0", optional = true} lancedb = "0.15.0" litellm = "1.49.1" -groq = "0.8.0" -langfuse = "^2.32.0" +groq = {version = "0.8.0", optional = true} +langfuse = {version = "^2.32.0", optional = true} pydantic-settings = "^2.2.1" anthropic = "^0.26.1" sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"} @@ -82,6 +82,8 @@ langchain = ["langsmith", "langchain_text_splitters"] llama-index = ["llama-index-core"] posthog = ["posthog"] falkordb = ["falkordb"] +groq = ["groq"] +langfuse = ["langfuse"] [tool.poetry.group.dev.dependencies] From 6841c83566dac3354d2fa127aa3c6fc9713e0586 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 2 Dec 2024 20:18:55 +0100 Subject: [PATCH 10/20] fix: fixes cognify duplicated edges and resets the methods to an older version --- .../graph/utils/get_graph_from_model.py | 151 ++++++++---------- .../utils/get_model_instance_from_graph.py | 42 ++--- 2 files changed, 83 insertions(+), 110 deletions(-) diff --git a/cognee/modules/graph/utils/get_graph_from_model.py b/cognee/modules/graph/utils/get_graph_from_model.py index 770e63d05..29137ddc7 100644 --- a/cognee/modules/graph/utils/get_graph_from_model.py +++ b/cognee/modules/graph/utils/get_graph_from_model.py @@ -1,16 +1,8 @@ from datetime import datetime, timezone - from cognee.infrastructure.engine import DataPoint from cognee.modules.storage.utils import copy_model - -def get_graph_from_model(data_point: DataPoint, added_nodes=None, added_edges=None): - - if not added_nodes: - added_nodes = {} - if not added_edges: - added_edges = {} - +def get_graph_from_model(data_point: DataPoint, include_root = True, added_nodes = {}, added_edges = {}): nodes = [] edges = [] @@ -20,92 +12,85 @@ def get_graph_from_model(data_point: DataPoint, added_nodes=None, added_edges=No for field_name, field_value in data_point: if field_name == "_metadata": continue - elif isinstance(field_value, DataPoint): - excluded_properties.add(field_name) - nodes, edges, added_nodes, added_edges = add_nodes_and_edges( - data_point, - field_name, - field_value, - nodes, - edges, - added_nodes, - added_edges, - ) - elif ( - isinstance(field_value, list) - and len(field_value) > 0 - and isinstance(field_value[0], DataPoint) - ): + if isinstance(field_value, DataPoint): excluded_properties.add(field_name) - for item in field_value: - n_edges_before = len(edges) - nodes, edges, added_nodes, added_edges = add_nodes_and_edges( - data_point, field_name, item, nodes, edges, added_nodes, added_edges - ) - edges = edges[:n_edges_before] + [ - (*edge[:3], {**edge[3], "metadata": {"type": "list"}}) - for edge in edges[n_edges_before:] - ] - else: - data_point_properties[field_name] = field_value + property_nodes, property_edges = get_graph_from_model(field_value, True, added_nodes, added_edges) - SimpleDataPointModel = copy_model( - type(data_point), - include_fields={ - "_metadata": (dict, data_point._metadata), - }, - exclude_fields=excluded_properties, - ) + for node in property_nodes: + if str(node.id) not in added_nodes: + nodes.append(node) + added_nodes[str(node.id)] = True - nodes.append(SimpleDataPointModel(**data_point_properties)) + for edge in property_edges: + edge_key = str(edge[0]) + str(edge[1]) + edge[2] - return nodes, edges + if str(edge_key) not in added_edges: + edges.append(edge) + added_edges[str(edge_key)] = True + for property_node in get_own_properties(property_nodes, property_edges): + edge_key = str(data_point.id) + str(property_node.id) + field_name -def add_nodes_and_edges( - data_point, field_name, field_value, nodes, edges, added_nodes, added_edges -): - - property_nodes, property_edges = get_graph_from_model( - field_value, dict(added_nodes), dict(added_edges) - ) - - for node in property_nodes: - if str(node.id) not in added_nodes: - nodes.append(node) - added_nodes[str(node.id)] = True - - for edge in property_edges: - edge_key = str(edge[0]) + str(edge[1]) + edge[2] - - if str(edge_key) not in added_edges: - edges.append(edge) - added_edges[str(edge_key)] = True - - for property_node in get_own_properties(property_nodes, property_edges): - edge_key = str(data_point.id) + str(property_node.id) + field_name - - if str(edge_key) not in added_edges: - edges.append( - ( - data_point.id, - property_node.id, - field_name, - { + if str(edge_key) not in added_edges: + edges.append((data_point.id, property_node.id, field_name, { "source_node_id": data_point.id, "target_node_id": property_node.id, "relationship_name": field_name, - "updated_at": datetime.now(timezone.utc).strftime( - "%Y-%m-%d %H:%M:%S" - ), - }, - ) - ) - added_edges[str(edge_key)] = True + "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + })) + added_edges[str(edge_key)] = True + continue - return (nodes, edges, added_nodes, added_edges) + if isinstance(field_value, list) and len(field_value) > 0 and isinstance(field_value[0], DataPoint): + excluded_properties.add(field_name) + + for item in field_value: + property_nodes, property_edges = get_graph_from_model(item, True, added_nodes, added_edges) + + for node in property_nodes: + if str(node.id) not in added_nodes: + nodes.append(node) + added_nodes[str(node.id)] = True + + for edge in property_edges: + edge_key = str(edge[0]) + str(edge[1]) + edge[2] + + if str(edge_key) not in added_edges: + edges.append(edge) + added_edges[edge_key] = True + + for property_node in get_own_properties(property_nodes, property_edges): + edge_key = str(data_point.id) + str(property_node.id) + field_name + + if str(edge_key) not in added_edges: + edges.append((data_point.id, property_node.id, field_name, { + "source_node_id": data_point.id, + "target_node_id": property_node.id, + "relationship_name": field_name, + "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + "metadata": { + "type": "list" + }, + })) + added_edges[edge_key] = True + continue + + data_point_properties[field_name] = field_value + + SimpleDataPointModel = copy_model( + type(data_point), + include_fields = { + "_metadata": (dict, data_point._metadata), + }, + exclude_fields = excluded_properties, + ) + + if include_root: + nodes.append(SimpleDataPointModel(**data_point_properties)) + + return nodes, edges def get_own_properties(property_nodes, property_edges): diff --git a/cognee/modules/graph/utils/get_model_instance_from_graph.py b/cognee/modules/graph/utils/get_model_instance_from_graph.py index 16658d743..82cdfa150 100644 --- a/cognee/modules/graph/utils/get_model_instance_from_graph.py +++ b/cognee/modules/graph/utils/get_model_instance_from_graph.py @@ -1,41 +1,29 @@ -from typing import Callable - from pydantic_core import PydanticUndefined - from cognee.infrastructure.engine import DataPoint from cognee.modules.storage.utils import copy_model -def get_model_instance_from_graph( - nodes: list[DataPoint], - edges: list[tuple[str, str, str, dict[str, str]]], - entity_id: str, -): - node_map = {node.id: node for node in nodes} +def get_model_instance_from_graph(nodes: list[DataPoint], edges: list, entity_id: str): + node_map = {} - for source_node_id, target_node_id, edge_label, edge_properties in edges: - source_node = node_map[source_node_id] - target_node = node_map[target_node_id] + for node in nodes: + node_map[node.id] = node + + for edge in edges: + source_node = node_map[edge[0]] + target_node = node_map[edge[1]] + edge_label = edge[2] + edge_properties = edge[3] if len(edge) == 4 else {} edge_metadata = edge_properties.get("metadata", {}) - edge_type = edge_metadata.get("type", "default") + edge_type = edge_metadata.get("type") if edge_type == "list": - NewModel = copy_model( - type(source_node), - {edge_label: (list[type(target_node)], PydanticUndefined)}, - ) - source_node_dict = source_node.model_dump() - source_node_edge_label_values = source_node_dict.get(edge_label, []) - source_node_dict[edge_label] = source_node_edge_label_values + [target_node] + NewModel = copy_model(type(source_node), { edge_label: (list[type(target_node)], PydanticUndefined) }) - node_map[source_node_id] = NewModel(**source_node_dict) + node_map[edge[0]] = NewModel(**source_node.model_dump(), **{ edge_label: [target_node] }) else: - NewModel = copy_model( - type(source_node), {edge_label: (type(target_node), PydanticUndefined)} - ) + NewModel = copy_model(type(source_node), { edge_label: (type(target_node), PydanticUndefined) }) - node_map[target_node_id] = NewModel( - **source_node.model_dump(), **{edge_label: target_node} - ) + node_map[edge[0]] = NewModel(**source_node.model_dump(), **{ edge_label: target_node }) return node_map[entity_id] From f65070087fcc0c1f7e6add9f9df545ca9f66b7b1 Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Tue, 3 Dec 2024 03:40:28 -0500 Subject: [PATCH 11/20] Feature: Integrate Milvus as the VectorDatabase --- .../databases/vector/create_vector_engine.py | 43 ++- .../databases/vector/milvus/MilvusAdapter.py | 245 ++++++++++++++++++ .../databases/vector/milvus/__init__.py | 1 + cognee/tests/test_milvus.py | 76 ++++++ poetry.lock | 138 +++++++++- pyproject.toml | 1 + 6 files changed, 486 insertions(+), 18 deletions(-) create mode 100644 cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py create mode 100644 cognee/infrastructure/databases/vector/milvus/__init__.py create mode 100644 cognee/tests/test_milvus.py diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 4b4799ee7..5dda755f6 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -1,11 +1,13 @@ from typing import Dict + class VectorConfig(Dict): vector_db_url: str vector_db_port: str vector_db_key: str vector_db_provider: str + def create_vector_engine(config: VectorConfig, embedding_engine): if config["vector_db_provider"] == "weaviate": from .weaviate_db import WeaviateAdapter @@ -16,24 +18,37 @@ def create_vector_engine(config: VectorConfig, embedding_engine): return WeaviateAdapter( config["vector_db_url"], config["vector_db_key"], - embedding_engine = embedding_engine + embedding_engine=embedding_engine ) elif config["vector_db_provider"] == "qdrant": if not (config["vector_db_url"] and config["vector_db_key"]): raise EnvironmentError("Missing requred Qdrant credentials!") - + from .qdrant.QDrantAdapter import QDrantAdapter return QDrantAdapter( - url = config["vector_db_url"], - api_key = config["vector_db_key"], - embedding_engine = embedding_engine + url=config["vector_db_url"], + api_key=config["vector_db_key"], + embedding_engine=embedding_engine ) + elif config['vector_db_provider'] == 'milvus': + from .milvus.MilvusAdapter import MilvusAdapter + + if not config["vector_db_url"]: + raise EnvironmentError("Missing required Milvus credentials!") + + return MilvusAdapter( + url=config["vector_db_url"], + api_key=config['vector_db_key'], + embedding_engine=embedding_engine + ) + + elif config["vector_db_provider"] == "pgvector": from cognee.infrastructure.databases.relational import get_relational_config - + # Get configuration for postgres database relational_config = get_relational_config() db_username = relational_config.db_username @@ -52,8 +67,8 @@ def create_vector_engine(config: VectorConfig, embedding_engine): from .pgvector.PGVectorAdapter import PGVectorAdapter return PGVectorAdapter( - connection_string, - config["vector_db_key"], + connection_string, + config["vector_db_key"], embedding_engine, ) @@ -64,16 +79,16 @@ def create_vector_engine(config: VectorConfig, embedding_engine): from ..hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter return FalkorDBAdapter( - database_url = config["vector_db_url"], - database_port = config["vector_db_port"], - embedding_engine = embedding_engine, + database_url=config["vector_db_url"], + database_port=config["vector_db_port"], + embedding_engine=embedding_engine, ) else: from .lancedb.LanceDBAdapter import LanceDBAdapter return LanceDBAdapter( - url = config["vector_db_url"], - api_key = config["vector_db_key"], - embedding_engine = embedding_engine, + url=config["vector_db_url"], + api_key=config["vector_db_key"], + embedding_engine=embedding_engine, ) diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py new file mode 100644 index 000000000..bfc0bbd18 --- /dev/null +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -0,0 +1,245 @@ +import asyncio +import logging +from typing import List, Optional +from uuid import UUID +from cognee.infrastructure.engine import DataPoint +from ..vector_db_interface import VectorDBInterface +from ..models.ScoredResult import ScoredResult +from ..embeddings.EmbeddingEngine import EmbeddingEngine +from pymilvus import MilvusClient + +logger = logging.getLogger("MilvusAdapter") + + +class IndexSchema(DataPoint): + text: str + + _metadata: dict = { + "index_fields": ["text"] + } + + +class MilvusAdapter(VectorDBInterface): + name = "Milvus" + url: str + api_key: Optional[str] + embedding_engine: EmbeddingEngine = None + + def __init__(self, url: str, api_key: Optional[str], embedding_engine: EmbeddingEngine): + self.url = url + self.api_key = api_key + + self.embedding_engine = embedding_engine + + def get_milvus_client(self) -> MilvusClient: + if self.api_key is not None: + client = MilvusClient(uri=self.url, token=self.api_key) + else: + client = MilvusClient(uri=self.url) + return client + + async def embed_data(self, data: List[str]) -> list[list[float]]: + return await self.embedding_engine.embed_text(data) + + async def has_collection(self, collection_name: str) -> bool: + future = asyncio.Future() + client = self.get_milvus_client() + future.set_result(client.has_collection(collection_name=collection_name)) + + return await future + + async def create_collection( + self, + collection_name: str, + payload_schema=None, + ): + from pymilvus import DataType, MilvusException + client = self.get_milvus_client() + if client.has_collection(collection_name=collection_name): + logger.info(f"Collection '{collection_name}' already exists.") + return True + + try: + dimension = self.embedding_engine.get_vector_size() + assert dimension > 0, "Embedding dimension must be greater than 0." + + schema = client.create_schema( + auto_id=False, + enable_dynamic_field=False, + ) + + schema.add_field( + field_name="id", + datatype=DataType.VARCHAR, + is_primary=True, + max_length=36 + ) + + schema.add_field( + field_name="vector", + datatype=DataType.FLOAT_VECTOR, + dim=dimension + ) + + schema.add_field( + field_name="text", + datatype=DataType.VARCHAR, + max_length=60535 + ) + + index_params = client.prepare_index_params() + index_params.add_index( + field_name="vector", + metric_type="COSINE" + ) + + client.create_collection( + collection_name=collection_name, + schema=schema, + index_params=index_params + ) + + client.load_collection(collection_name) + + logger.info(f"Collection '{collection_name}' created successfully.") + return True + except MilvusException as e: + logger.error(f"Error creating collection '{collection_name}': {str(e)}") + raise e + + async def create_data_points( + self, + collection_name: str, + data_points: List[DataPoint] + ): + from pymilvus import MilvusException + client = self.get_milvus_client() + data_vectors = await self.embed_data( + [data_point.get_embeddable_data() for data_point in data_points] + ) + + insert_data = [ + { + "id": str(data_point.id), + "vector": data_vectors[index], + "text": data_point.text, + } + for index, data_point in enumerate(data_points) + ] + + try: + result = client.insert( + collection_name=collection_name, + data=insert_data + ) + logger.info( + f"Inserted {result.get('insert_count', 0)} data points into collection '{collection_name}'." + ) + return result + except MilvusException as e: + logger.error(f"Error inserting data points into collection '{collection_name}': {str(e)}") + raise e + + async def create_vector_index(self, index_name: str, index_property_name: str): + await self.create_collection(f"{index_name}_{index_property_name}") + + async def index_data_points(self, index_name: str, index_property_name: str, data_points: List[DataPoint]): + formatted_data_points = [ + IndexSchema( + id=data_point.id, + text=getattr(data_point, data_point._metadata["index_fields"][0]), + ) + for data_point in data_points + ] + collection_name = f"{index_name}_{index_property_name}" + await self.create_data_points(collection_name, formatted_data_points) + + async def retrieve(self, collection_name: str, data_point_ids: list[str]): + from pymilvus import MilvusException + client = self.get_milvus_client() + try: + filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]""" + + results = client.query( + collection_name=collection_name, + expr=filter_expression, + output_fields=["*"], + ) + return results + except MilvusException as e: + logger.error(f"Error retrieving data points from collection '{collection_name}': {str(e)}") + raise e + + async def search( + self, + collection_name: str, + query_text: Optional[str] = None, + query_vector: Optional[List[float]] = None, + limit: int = 5, + with_vector: bool = False, + ): + from pymilvus import MilvusException + client = self.get_milvus_client() + if query_text is None and query_vector is None: + raise ValueError("One of query_text or query_vector must be provided!") + + try: + query_vector = query_vector or (await self.embed_data([query_text]))[0] + + output_fields = ["id", "text"] + if with_vector: + output_fields.append("vector") + + results = client.search( + collection_name=collection_name, + data=[query_vector], + anns_field="vector", + limit=limit, + output_fields=output_fields, + search_params={ + "metric_type": "COSINE", + }, + ) + + return [ + ScoredResult( + id=UUID(result["id"]), + score=result["distance"], + payload=result.get("entity", {}), + ) + for result in results[0] + ] + except MilvusException as e: + logger.error(f"Error during search in collection '{collection_name}': {str(e)}") + raise e + + async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False): + def query_search(query_vector): + return self.search(collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors) + + return [await query_search(query_vector) for query_vector in await self.embed_data(query_texts)] + + async def delete_data_points(self, collection_name: str, data_point_ids: list[str]): + from pymilvus import MilvusException + client = self.get_milvus_client() + try: + filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]""" + + delete_result = client.delete( + collection_name=collection_name, + filter=filter_expression + ) + + logger.info(f"Deleted data points with IDs {data_point_ids} from collection '{collection_name}'.") + return delete_result + except MilvusException as e: + logger.error(f"Error deleting data points from collection '{collection_name}': {str(e)}") + raise e + + async def prune(self): + client = self.get_milvus_client() + if client: + collections = client.list_collections() + for collection_name in collections: + client.drop_collection(collection_name=collection_name) + client.close() diff --git a/cognee/infrastructure/databases/vector/milvus/__init__.py b/cognee/infrastructure/databases/vector/milvus/__init__.py new file mode 100644 index 000000000..ecb3cb14b --- /dev/null +++ b/cognee/infrastructure/databases/vector/milvus/__init__.py @@ -0,0 +1 @@ +from .MilvusAdapter import MilvusAdapter diff --git a/cognee/tests/test_milvus.py b/cognee/tests/test_milvus.py new file mode 100644 index 000000000..d565f6446 --- /dev/null +++ b/cognee/tests/test_milvus.py @@ -0,0 +1,76 @@ +import os +import logging +import pathlib +import cognee +from cognee.api.v1.search import SearchType + +logging.basicConfig(level=logging.DEBUG) + + +async def main(): + cognee.config.set_vector_db_provider("milvus") + data_directory_path = str( + pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_milvus")).resolve()) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve()) + cognee.config.system_root_directory(cognee_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + dataset_name = "cs_explanations" + + explanation_file_path = os.path.join(pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt") + await cognee.add([explanation_file_path], dataset_name) + + text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. + At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. + Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. + The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. + Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. + In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. + """ + + await cognee.add([text], dataset_name) + + await cognee.cognify([dataset_name]) + + from cognee.infrastructure.databases.vector import get_vector_engine + vector_engine = get_vector_engine() + random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0] + random_node_name = random_node.payload["text"] + + search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name) + assert len(search_results) != 0, "The search results list is empty." + print("\n\nExtracted INSIGHTS are:\n") + for result in search_results: + print(f"{result}\n") + + search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name) + assert len(search_results) != 0, "The search results list is empty." + print("\n\nExtracted CHUNKS are:\n") + for result in search_results: + print(f"{result}\n") + + search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name) + assert len(search_results) != 0, "The search results list is empty." + print("\nExtracted SUMMARIES are:\n") + for result in search_results: + print(f"{result}\n") + + history = await cognee.get_search_history() + assert len(history) == 6, "Search history is not correct." + + await cognee.prune.prune_data() + assert not os.path.isdir(data_directory_path), "Local data files are not deleted" + + await cognee.prune.prune_system(metadata=True) + milvus_client = get_vector_engine().get_milvus_client() + collections = milvus_client.list_collections() + assert len(collections) == 0, "Milvus vector database is not empty" + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) diff --git a/poetry.lock b/poetry.lock index 7d09c340d..4b8262648 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5" name = "grpcio" version = "1.67.1" description = "HTTP/2-based RPC framework" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, @@ -2751,6 +2751,8 @@ optional = false python-versions = "*" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, + {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, + {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -3602,6 +3604,22 @@ files = [ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, ] +[[package]] +name = "milvus-lite" +version = "2.4.10" +description = "A lightweight version of Milvus wrapped with Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"}, + {file = "milvus_lite-2.4.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:74a8e07c5e3b057df17fbb46913388e84df1dc403a200f4e423799a58184c800"}, + {file = "milvus_lite-2.4.10-py3-none-manylinux2014_aarch64.whl", hash = "sha256:240c7386b747bad696ecb5bd1f58d491e86b9d4b92dccee3315ed7256256eddc"}, + {file = "milvus_lite-2.4.10-py3-none-manylinux2014_x86_64.whl", hash = "sha256:211d2e334a043f9282bdd9755f76b9b2d93b23bffa7af240919ffce6a8dfe325"}, +] + +[package.dependencies] +tqdm = "*" + [[package]] name = "mistune" version = "3.0.2" @@ -4938,7 +4956,7 @@ files = [ name = "protobuf" version = "5.28.3" description = "" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"}, @@ -5360,6 +5378,31 @@ pyyaml = "*" [package.extras] extra = ["pygments (>=2.12)"] +[[package]] +name = "pymilvus" +version = "2.5.0" +description = "Python Sdk for Milvus" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"}, + {file = "pymilvus-2.5.0.tar.gz", hash = "sha256:4da14a3bd957a4921166f9355fd1f1ac5c5e4e80b46f12f64d9c9a6dcb8cb395"}, +] + +[package.dependencies] +grpcio = ">=1.49.1,<=1.67.1" +milvus-lite = {version = ">=2.4.0", markers = "sys_platform != \"win32\""} +pandas = ">=1.2.4" +protobuf = ">=3.20.0" +python-dotenv = ">=1.0.1,<2.0.0" +setuptools = ">69" +ujson = ">=2.0.0" + +[package.extras] +bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"] +dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"] +model = ["milvus-model (>=0.1.0)"] + [[package]] name = "pyparsing" version = "3.2.0" @@ -7075,6 +7118,93 @@ files = [ {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, ] +[[package]] +name = "ujson" +version = "5.10.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, + {file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"}, + {file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"}, + {file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"}, + {file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"}, + {file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"}, + {file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"}, + {file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"}, + {file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"}, + {file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"}, + {file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"}, + {file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"}, + {file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"}, + {file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"}, + {file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"}, +] + [[package]] name = "uri-template" version = "1.3.0" @@ -7645,4 +7775,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "6b57d44b0924bcf64397b3807c2a6ba369166e1d2102b5312c8f8ae2d5323376" +content-hash = "6d578f99d990d462114faecd28a81aa50417bc541d64a67b53063f6c107eb3d3" diff --git a/pyproject.toml b/pyproject.toml index 6fac2597c..44ca875a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ asyncpg = {version = "0.30.0", optional = true} pgvector = {version = "^0.3.5", optional = true} psycopg2 = {version = "^2.9.10", optional = true} llama-index-core = {version = "^0.11.22", optional = true} +pymilvus = "^2.5.0" [tool.poetry.extras] filesystem = ["s3fs", "botocore"] From fb5f0cf00fdc1dff830531594c8e6c79504a2bdc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 10:37:50 +0100 Subject: [PATCH 12/20] chore: Make milvus an optional dependency Make Milvus an optional dependency, expand docs with Milvus information Chore --- .env.template | 2 +- README.md | 12 ++++++++++++ pyproject.toml | 4 ++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.env.template b/.env.template index ff591c0a7..75a57de4d 100644 --- a/.env.template +++ b/.env.template @@ -14,7 +14,7 @@ GRAPH_DATABASE_URL= GRAPH_DATABASE_USERNAME= GRAPH_DATABASE_PASSWORD= -# "qdrant", "pgvector", "weaviate" or "lancedb" +# "qdrant", "pgvector", "weaviate", "milvus" or "lancedb" VECTOR_DB_PROVIDER="lancedb" # Not needed if using "lancedb" or "pgvector" VECTOR_DB_URL= diff --git a/README.md b/README.md index 2b29f1448..efb6e23ba 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,12 @@ pip install 'cognee[qdrant]' pip install 'cognee[neo4j]' ``` +### With pip with Milvus support + +```bash +pip install 'cognee[milvus]' +``` + ### With poetry ```bash @@ -83,6 +89,12 @@ poetry add cognee -E qdrant poetry add cognee -E neo4j ``` +### With poetry with Milvus support + +```bash +poetry add cognee -E milvus +``` + ## 💻 Basic Usage diff --git a/pyproject.toml b/pyproject.toml index 44ca875a6..c66b23c89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ asyncpg = {version = "0.30.0", optional = true} pgvector = {version = "^0.3.5", optional = true} psycopg2 = {version = "^2.9.10", optional = true} llama-index-core = {version = "^0.11.22", optional = true} -pymilvus = "^2.5.0" +pymilvus = {version = "^2.5.0", optional = true} [tool.poetry.extras] filesystem = ["s3fs", "botocore"] @@ -85,7 +85,7 @@ posthog = ["posthog"] falkordb = ["falkordb"] groq = ["groq"] langfuse = ["langfuse"] - +milvus = ["pymilvus"] [tool.poetry.group.dev.dependencies] pytest = "^7.4.0" From 764c0895dfb884e1e054942d6d8af76b5ab57c16 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 11:13:54 +0100 Subject: [PATCH 13/20] fix: Resolve Milvus connection issue, add config to milvus test, add milvus gh action Resolved if statement resolution issue regrading api key, Added vector db config to milvus test, Added milvus gh action Fix --- .github/workflows/test_milvus.yml | 64 +++++++++++++++++++ .../databases/vector/milvus/MilvusAdapter.py | 8 ++- cognee/tests/test_milvus.py | 8 +++ poetry.lock | 17 +++-- 4 files changed, 85 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/test_milvus.yml diff --git a/.github/workflows/test_milvus.yml b/.github/workflows/test_milvus.yml new file mode 100644 index 000000000..2cfd88993 --- /dev/null +++ b/.github/workflows/test_milvus.yml @@ -0,0 +1,64 @@ +name: test | milvus + +on: + workflow_dispatch: + pull_request: + branches: + - main + types: [labeled, synchronize] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUNTIME__LOG_LEVEL: ERROR + ENV: 'dev' + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + + run_milvus: + name: test + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + defaults: + run: + shell: bash + + steps: + - name: Check out + uses: actions/checkout@master + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11.x' + + - name: Install Poetry + # https://github.com/snok/install-poetry#running-on-windows + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: poetry install -E milvus --no-interaction + + - name: Run default basic pipeline + env: + ENV: 'dev' + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: poetry run python ./cognee/tests/test_milvus.py + + - name: Clean up disk space + run: | + sudo rm -rf ~/.cache + sudo rm -rf /tmp/* + df -h diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index bfc0bbd18..d3774542a 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import logging from typing import List, Optional @@ -6,7 +8,6 @@ from cognee.infrastructure.engine import DataPoint from ..vector_db_interface import VectorDBInterface from ..models.ScoredResult import ScoredResult from ..embeddings.EmbeddingEngine import EmbeddingEngine -from pymilvus import MilvusClient logger = logging.getLogger("MilvusAdapter") @@ -31,8 +32,9 @@ class MilvusAdapter(VectorDBInterface): self.embedding_engine = embedding_engine - def get_milvus_client(self) -> MilvusClient: - if self.api_key is not None: + def get_milvus_client(self) -> "MilvusClient": + from pymilvus import MilvusClient + if self.api_key: client = MilvusClient(uri=self.url, token=self.api_key) else: client = MilvusClient(uri=self.url) diff --git a/cognee/tests/test_milvus.py b/cognee/tests/test_milvus.py index d565f6446..b32d3590b 100644 --- a/cognee/tests/test_milvus.py +++ b/cognee/tests/test_milvus.py @@ -16,6 +16,14 @@ async def main(): pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve()) cognee.config.system_root_directory(cognee_directory_path) + cognee.config.set_vector_db_config( + { + "vector_db_url": os.path.join(cognee_directory_path, "databases/milvus.db"), + "vector_db_key": "", + "vector_db_provider": "milvus" + } + ) + await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) diff --git a/poetry.lock b/poetry.lock index 4b8262648..3611dacea 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "aiofiles" @@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5" name = "grpcio" version = "1.67.1" description = "HTTP/2-based RPC framework" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, @@ -2751,8 +2751,6 @@ optional = false python-versions = "*" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, - {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, - {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -3608,7 +3606,7 @@ files = [ name = "milvus-lite" version = "2.4.10" description = "A lightweight version of Milvus wrapped with Python." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"}, @@ -4956,7 +4954,7 @@ files = [ name = "protobuf" version = "5.28.3" description = "" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"}, @@ -5382,7 +5380,7 @@ extra = ["pygments (>=2.12)"] name = "pymilvus" version = "2.5.0" description = "Python Sdk for Milvus" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"}, @@ -7122,7 +7120,7 @@ files = [ name = "ujson" version = "5.10.0" description = "Ultra fast JSON encoder and decoder for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, @@ -7765,6 +7763,7 @@ groq = ["groq"] langchain = ["langchain_text_splitters", "langsmith"] langfuse = ["langfuse"] llama-index = ["llama-index-core"] +milvus = ["pymilvus"] neo4j = ["neo4j"] notebook = [] postgres = ["asyncpg", "pgvector", "psycopg2"] @@ -7775,4 +7774,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "6d578f99d990d462114faecd28a81aa50417bc541d64a67b53063f6c107eb3d3" +content-hash = "d6b10b74a910202f224ff34fa06ad3d2767796a6492a96724de0d608ac0356c5" From c301498da055ce3f93020c249756d78d0f1dd24b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 11:58:34 +0100 Subject: [PATCH 14/20] fix: Fix batch search function Rewrite batch search to work as async gather Fix --- .../databases/vector/milvus/MilvusAdapter.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index d3774542a..84beb7273 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -216,10 +216,15 @@ class MilvusAdapter(VectorDBInterface): raise e async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False): - def query_search(query_vector): - return self.search(collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors) + query_vectors = await self.embed_data(query_texts) - return [await query_search(query_vector) for query_vector in await self.embed_data(query_texts)] + return await asyncio.gather( + *[self.search(collection_name=collection_name, + query_vector=query_vector, + limit=limit, + with_vector=with_vectors, + ) for query_vector in query_vectors] + ) async def delete_data_points(self, collection_name: str, data_point_ids: list[str]): from pymilvus import MilvusException From e462ebe2e67f7368b1d59b826df4a4fba686ec1a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 13:38:07 +0100 Subject: [PATCH 15/20] docs: Update README.md with stable databases Update README.md with state of stable databases Docs --- README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index efb6e23ba..e57b498ab 100644 --- a/README.md +++ b/README.md @@ -368,12 +368,13 @@ pip install cognee } -| Name | Type | Current state | Known Issues | -|------------------|--------------------|-------------------|---------------------------------------| -| Qdrant | Vector | Stable ✅ | | -| Weaviate | Vector | Stable ✅ | | -| LanceDB | Vector | Stable ✅ | | -| Neo4j | Graph | Stable ✅ | | -| NetworkX | Graph | Stable ✅ | | -| FalkorDB | Vector/Graph | Unstable ❌ | | -| PGVector | Vector | Unstable ❌ | Postgres DB returns the Timeout error | +| Name | Type | Current state | Known Issues | +|----------|--------------------|-------------------|--------------| +| Qdrant | Vector | Stable ✅ | | +| Weaviate | Vector | Stable ✅ | | +| LanceDB | Vector | Stable ✅ | | +| Neo4j | Graph | Stable ✅ | | +| NetworkX | Graph | Stable ✅ | | +| FalkorDB | Vector/Graph | Unstable ❌ | | +| PGVector | Vector | Stable ✅ | | +| Milvus | Vector | Stable ✅ | | \ No newline at end of file From 18e0aa2f6c30c3bb89b21b35b75c1d577f4569e6 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:20:29 +0100 Subject: [PATCH 16/20] fix: deletes get_graph_from_model test of the faulty old implementation --- .../graph/get_graph_from_model_test.py | 89 ------------------- 1 file changed, 89 deletions(-) delete mode 100644 cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py diff --git a/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py b/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py deleted file mode 100644 index e56a2dff2..000000000 --- a/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py +++ /dev/null @@ -1,89 +0,0 @@ -from cognee.modules.graph.utils import get_graph_from_model -from cognee.tests.unit.interfaces.graph.util import run_test_against_ground_truth - -CAR_SEDAN_EDGE = ( - "car1", - "sedan", - "is_type", - { - "source_node_id": "car1", - "target_node_id": "sedan", - "relationship_name": "is_type", - }, -) - - -BORIS_CAR_EDGE_GROUND_TRUTH = ( - "boris", - "car1", - "owns_car", - { - "source_node_id": "boris", - "target_node_id": "car1", - "relationship_name": "owns_car", - "metadata": {"type": "list"}, - }, -) - -CAR_TYPE_GROUND_TRUTH = {"id": "sedan"} - -CAR_GROUND_TRUTH = { - "id": "car1", - "brand": "Toyota", - "model": "Camry", - "year": 2020, - "color": "Blue", -} - -PERSON_GROUND_TRUTH = { - "id": "boris", - "name": "Boris", - "age": 30, - "driving_license": { - "issued_by": "PU Vrsac", - "issued_on": "2025-11-06", - "number": "1234567890", - "expires_on": "2025-11-06", - }, -} - - -def test_extracted_car_type(boris): - nodes, _ = get_graph_from_model(boris) - assert len(nodes) == 3 - car_type = nodes[0] - run_test_against_ground_truth("car_type", car_type, CAR_TYPE_GROUND_TRUTH) - - -def test_extracted_car(boris): - nodes, _ = get_graph_from_model(boris) - assert len(nodes) == 3 - car = nodes[1] - run_test_against_ground_truth("car", car, CAR_GROUND_TRUTH) - - -def test_extracted_person(boris): - nodes, _ = get_graph_from_model(boris) - assert len(nodes) == 3 - person = nodes[2] - run_test_against_ground_truth("person", person, PERSON_GROUND_TRUTH) - - -def test_extracted_car_sedan_edge(boris): - _, edges = get_graph_from_model(boris) - edge = edges[0] - - assert CAR_SEDAN_EDGE[:3] == edge[:3], f"{CAR_SEDAN_EDGE[:3] = } != {edge[:3] = }" - for key, ground_truth in CAR_SEDAN_EDGE[3].items(): - assert ground_truth == edge[3][key], f"{ground_truth = } != {edge[3][key] = }" - - -def test_extracted_boris_car_edge(boris): - _, edges = get_graph_from_model(boris) - edge = edges[1] - - assert ( - BORIS_CAR_EDGE_GROUND_TRUTH[:3] == edge[:3] - ), f"{BORIS_CAR_EDGE_GROUND_TRUTH[:3] = } != {edge[:3] = }" - for key, ground_truth in BORIS_CAR_EDGE_GROUND_TRUTH[3].items(): - assert ground_truth == edge[3][key], f"{ground_truth = } != {edge[3][key] = }" From 688c3dfdb7094340269b5a221ca374d67b5778d2 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:33:46 +0100 Subject: [PATCH 17/20] Fix: deletes test that were implemented the faulty get_model_from_graph method --- .../get_graph_from_model_generative_test.py | 37 ------------------- ...del_instance_from_graph_generative_test.py | 33 ----------------- .../get_model_instance_from_graph_test.py | 35 ------------------ 3 files changed, 105 deletions(-) delete mode 100644 cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py delete mode 100644 cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py delete mode 100644 cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py diff --git a/cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py b/cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py deleted file mode 100644 index dec751f89..000000000 --- a/cognee/tests/unit/interfaces/graph/get_graph_from_model_generative_test.py +++ /dev/null @@ -1,37 +0,0 @@ -import warnings - -import pytest - -from cognee.modules.graph.utils import get_graph_from_model -from cognee.tests.unit.interfaces.graph.util import ( - PERSON_NAMES, - count_society, - create_organization_recursive, -) - - -@pytest.mark.parametrize("recursive_depth", [1, 2, 3]) -def test_society_nodes_and_edges(recursive_depth): - import sys - - if sys.version_info[0] == 3 and sys.version_info[1] >= 11: - society = create_organization_recursive( - "society", "Society", PERSON_NAMES, recursive_depth - ) - - n_organizations, n_persons = count_society(society) - society_counts_total = n_organizations + n_persons - - nodes, edges = get_graph_from_model(society) - - assert ( - len(nodes) == society_counts_total - ), f"{society_counts_total = } != {len(nodes) = }, not all DataPoint instances were found" - - assert len(edges) == ( - len(nodes) - 1 - ), f"{(len(nodes) - 1) = } != {len(edges) = }, there have to be n_nodes - 1 edges, as each node has exactly one parent node, except for the root node" - else: - warnings.warn( - "The recursive pydantic data structure cannot be reconstructed from the graph because the 'inner' pydantic class is not defined. Hence this test is skipped. This problem is solved in Python 3.11" - ) diff --git a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py b/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py deleted file mode 100644 index dd5e19469..000000000 --- a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_generative_test.py +++ /dev/null @@ -1,33 +0,0 @@ -import warnings - -import pytest - -from cognee.modules.graph.utils import ( - get_graph_from_model, - get_model_instance_from_graph, -) -from cognee.tests.unit.interfaces.graph.util import ( - PERSON_NAMES, - create_organization_recursive, - show_first_difference, -) - - -@pytest.mark.parametrize("recursive_depth", [1, 2, 3]) -def test_society_nodes_and_edges(recursive_depth): - import sys - - if sys.version_info[0] == 3 and sys.version_info[1] >= 11: - society = create_organization_recursive( - "society", "Society", PERSON_NAMES, recursive_depth - ) - nodes, edges = get_graph_from_model(society) - parsed_society = get_model_instance_from_graph(nodes, edges, "society") - - assert str(society) == (str(parsed_society)), show_first_difference( - str(society), str(parsed_society), "society", "parsed_society" - ) - else: - warnings.warn( - "The recursive pydantic data structure cannot be reconstructed from the graph because the 'inner' pydantic class is not defined. Hence this test is skipped. This problem is solved in Python 3.11" - ) diff --git a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py b/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py deleted file mode 100644 index f1aa7736d..000000000 --- a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py +++ /dev/null @@ -1,35 +0,0 @@ -from cognee.modules.graph.utils import ( - get_graph_from_model, - get_model_instance_from_graph, -) -from cognee.tests.unit.interfaces.graph.util import run_test_against_ground_truth - -PARSED_PERSON_GROUND_TRUTH = { - "id": "boris", - "name": "Boris", - "age": 30, - "driving_license": { - "issued_by": "PU Vrsac", - "issued_on": "2025-11-06", - "number": "1234567890", - "expires_on": "2025-11-06", - }, -} - -CAR_GROUND_TRUTH = { - "id": "car1", - "brand": "Toyota", - "model": "Camry", - "year": 2020, - "color": "Blue", -} - - -def test_parsed_person(boris): - nodes, edges = get_graph_from_model(boris) - parsed_person = get_model_instance_from_graph(nodes, edges, "boris") - - run_test_against_ground_truth( - "parsed_person", parsed_person, PARSED_PERSON_GROUND_TRUTH - ) - run_test_against_ground_truth("car", parsed_person.owns_car[0], CAR_GROUND_TRUTH) From 57f319fb32c1b271684358cd13706c41f0da33d8 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:44:49 +0100 Subject: [PATCH 18/20] fix: deletes dummy data of faulty tests --- .../tests/unit/interfaces/graph/conftest.py | 68 -------- cognee/tests/unit/interfaces/graph/util.py | 150 ------------------ 2 files changed, 218 deletions(-) delete mode 100644 cognee/tests/unit/interfaces/graph/conftest.py delete mode 100644 cognee/tests/unit/interfaces/graph/util.py diff --git a/cognee/tests/unit/interfaces/graph/conftest.py b/cognee/tests/unit/interfaces/graph/conftest.py deleted file mode 100644 index 45f977bd6..000000000 --- a/cognee/tests/unit/interfaces/graph/conftest.py +++ /dev/null @@ -1,68 +0,0 @@ -from enum import Enum -from typing import Optional - -import pytest - -from cognee.infrastructure.engine import DataPoint - - -class CarTypeName(Enum): - Pickup = "Pickup" - Sedan = "Sedan" - SUV = "SUV" - Coupe = "Coupe" - Convertible = "Convertible" - Hatchback = "Hatchback" - Wagon = "Wagon" - Minivan = "Minivan" - Van = "Van" - - -class CarType(DataPoint): - id: str - name: CarTypeName - _metadata: dict = dict(index_fields=["name"]) - - -class Car(DataPoint): - id: str - brand: str - model: str - year: int - color: str - is_type: CarType - - -class Person(DataPoint): - id: str - name: str - age: int - owns_car: list[Car] - driving_license: Optional[dict] - _metadata: dict = dict(index_fields=["name"]) - - -@pytest.fixture(scope="function") -def boris(): - boris = Person( - id="boris", - name="Boris", - age=30, - owns_car=[ - Car( - id="car1", - brand="Toyota", - model="Camry", - year=2020, - color="Blue", - is_type=CarType(id="sedan", name=CarTypeName.Sedan), - ) - ], - driving_license={ - "issued_by": "PU Vrsac", - "issued_on": "2025-11-06", - "number": "1234567890", - "expires_on": "2025-11-06", - }, - ) - return boris diff --git a/cognee/tests/unit/interfaces/graph/util.py b/cognee/tests/unit/interfaces/graph/util.py deleted file mode 100644 index a20bdb3e4..000000000 --- a/cognee/tests/unit/interfaces/graph/util.py +++ /dev/null @@ -1,150 +0,0 @@ -import random -import string -from datetime import datetime, timezone -from typing import Any, Dict, Optional - -from cognee.infrastructure.engine import DataPoint - - -def run_test_against_ground_truth( - test_target_item_name: str, test_target_item: Any, ground_truth_dict: Dict[str, Any] -): - """Validates test target item attributes against ground truth values. - - Args: - test_target_item_name: Name of the item being tested (for error messages) - test_target_item: Object whose attributes are being validated - ground_truth_dict: Dictionary containing expected values - - Raises: - AssertionError: If any attribute doesn't match ground truth or if update timestamp is too old - """ - for key, ground_truth in ground_truth_dict.items(): - if isinstance(ground_truth, dict): - for key2, ground_truth2 in ground_truth.items(): - assert ( - ground_truth2 == getattr(test_target_item, key)[key2] - ), f"{test_target_item_name}/{key = }/{key2 = }: {ground_truth2 = } != {getattr(test_target_item, key)[key2] = }" - elif isinstance(ground_truth, list): - raise NotImplementedError("Currently not implemented for 'list'") - else: - assert ground_truth == getattr( - test_target_item, key - ), f"{test_target_item_name}/{key = }: {ground_truth = } != {getattr(test_target_item, key) = }" - time_delta = datetime.now(timezone.utc) - getattr(test_target_item, "updated_at") - - assert time_delta.total_seconds() < 60, f"{ time_delta.total_seconds() = }" - - -class Organization(DataPoint): - id: str - name: str - members: Optional[list["SocietyPerson"]] - - -class SocietyPerson(DataPoint): - id: str - name: str - memberships: Optional[list[Organization]] - - -SocietyPerson.model_rebuild() -Organization.model_rebuild() - - -ORGANIZATION_NAMES = [ - "ChessClub", - "RowingClub", - "TheatreTroupe", - "PoliticalParty", - "Charity", - "FanClub", - "FilmClub", - "NeighborhoodGroup", - "LocalCouncil", - "Band", -] -PERSON_NAMES = ["Sarah", "Anna", "John", "Sam"] - - -def create_society_person_recursive(id, name, organization_names, max_depth, depth=0): - id_suffix = "".join(random.choice(string.ascii_lowercase) for _ in range(10)) - - if depth < max_depth: - memberships = [ - create_organization_recursive( - f"{org_name}-{depth}-{id_suffix}", - org_name.lower(), - PERSON_NAMES, - max_depth, - depth + 1, - ) - for org_name in organization_names - ] - else: - memberships = None - - return SocietyPerson(id=id, name=f"{name}{depth}", memberships=memberships) - - -def create_organization_recursive(id, name, member_names, max_depth, depth=0): - id_suffix = "".join(random.choice(string.ascii_lowercase) for _ in range(10)) - - if depth < max_depth: - members = [ - create_society_person_recursive( - f"{member_name}-{depth}-{id_suffix}", - member_name.lower(), - ORGANIZATION_NAMES, - max_depth, - depth + 1, - ) - for member_name in member_names - ] - else: - members = None - - return Organization(id=id, name=f"{name}{depth}", members=members) - - -def count_society(obj): - if isinstance(obj, SocietyPerson): - if obj.memberships is not None: - organization_counts, society_person_counts = zip( - *[count_society(organization) for organization in obj.memberships] - ) - organization_count = sum(organization_counts) - society_person_count = sum(society_person_counts) + 1 - return (organization_count, society_person_count) - else: - return (0, 1) - if isinstance(obj, Organization): - if obj.members is not None: - organization_counts, society_person_counts = zip( - *[count_society(organization) for organization in obj.members] - ) - organization_count = sum(organization_counts) + 1 - society_person_count = sum(society_person_counts) - return (organization_count, society_person_count) - else: - return (1, 0) - else: - raise Exception("Not allowed") - - -def show_first_difference(str1, str2, str1_name, str2_name, context=30): - for i, (c1, c2) in enumerate(zip(str1, str2)): - if c1 != c2: - start = max(0, i - context) - end1 = min(len(str1), i + context + 1) - end2 = min(len(str2), i + context + 1) - if i > 0: - return f"identical: '{str1[start:i-1]}' | {str1_name}: '{str1[i-1:end1]}'... != {str2_name}: '{str2[i-1:end2]}'..." - else: - return f"{str1_name} and {str2_name} have no overlap in characters" - if len(str1) > len(str2): - return f"{str2_name} is identical up to the {i}th character, missing afterwards '{str1[i:i+context]}'..." - if len(str2) > len(str1): - return f"{str1_name} is identical up to the {i}th character, missing afterwards '{str2[i:i+context]}'..." - else: - return f"{str1_name} and {str2_name} are identical." From 1d18dd2f18b9a4566281981c216c377eafac29d2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 17:33:18 +0100 Subject: [PATCH 19/20] docs: Update README.md to have less clutter Resolve clutter in README.md docs --- README.md | 70 ++++++++++++++++++------------------------------------- 1 file changed, 23 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index e57b498ab..dc43a65c0 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ If you have questions, join our Discord ## 📦 Installation -You can install Cognee using either **pip** or **poetry**. +You can install Cognee using either **pip** or **poetry**. Support for various databases and vector stores is available through extras. ### With pip @@ -29,73 +29,49 @@ Support for various databases and vector stores is available through extras. pip install cognee ``` -### With pip with PostgreSQL support - -```bash -pip install 'cognee[postgres]' -``` - -### With pip with Weaviate support - -```bash -pip install 'cognee[weaviate]' -``` - -### With pip with Qdrant support - -```bash -pip install 'cognee[qdrant]' -``` - -### With pip with Neo4j support - -```bash -pip install 'cognee[neo4j]' -``` - -### With pip with Milvus support - -```bash -pip install 'cognee[milvus]' -``` - ### With poetry ```bash poetry add cognee ``` -### With poetry with PostgreSQL support +### With pip with specific database support +To install Cognee with support for specific databases use the appropriate command below. Replace \ with the name of the database you need. ```bash -poetry add cognee -E postgres +pip install 'cognee[]' ``` -### With poetry with Weaviate support +Replace \ with any of the following databases: +- postgres +- weaviate +- qdrant +- neo4j +- milvus +For example with postgres and neo4j support: ```bash -poetry add cognee -E weaviate +pip install 'cognee[postgres, neo4j]' ``` -### With poetry with Qdrant support +### With poetry with specific database support +To install Cognee with support for specific databases use the appropriate command below. Replace \ with the name of the database you need. ```bash -poetry add cognee -E qdrant +poetry add cognee -E ``` +Replace \ with any of the following databases: +- postgres +- weaviate +- qdrant +- neo4j +- milvus -### With poetry with Neo4j support - +For example with postgres and neo4j support: ```bash -poetry add cognee -E neo4j +poetry add cognee -E postgres -E neo4j ``` -### With poetry with Milvus support - -```bash -poetry add cognee -E milvus -``` - - ## 💻 Basic Usage ### Setup From 50e9b81aae61fa1218371315b276af9f40a18be5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 17:38:31 +0100 Subject: [PATCH 20/20] docs: Better wording for README.md Better wording for part of README.md docs --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dc43a65c0..f0aa2a280 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Replace \ with any of the following databases: - neo4j - milvus -For example with postgres and neo4j support: +Installing Cognee with PostgreSQL and Neo4j support example: ```bash pip install 'cognee[postgres, neo4j]' ``` @@ -67,7 +67,7 @@ Replace \ with any of the following databases: - neo4j - milvus -For example with postgres and neo4j support: +Installing Cognee with PostgreSQL and Neo4j support example: ```bash poetry add cognee -E postgres -E neo4j ```