From 3b07f3c08d580319351b854d1cec179749fbe3ef Mon Sep 17 00:00:00 2001 From: Hande <159312713+hande-k@users.noreply.github.com> Date: Fri, 16 May 2025 09:30:47 +0200 Subject: [PATCH] feat: Test db examples (#817) ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Boris Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com> --- .github/workflows/db_examples_tests.yml | 269 ++++++++++++++++++ .github/workflows/test_suites.yml | 8 + cognee/api/v1/config/config.py | 2 +- .../databases/vector/milvus/MilvusAdapter.py | 25 +- .../databases/vector/qdrant/QDrantAdapter.py | 14 +- .../vector/weaviate_db/WeaviateAdapter.py | 2 +- .../utils/brute_force_triplet_search.py | 2 +- .../database_examples/chromadb_example.py | 87 ++++++ .../database_examples/falkordb_example.py | 87 ++++++ examples/database_examples/kuzu_example.py | 85 ++++++ examples/database_examples/milvus_example.py | 89 ++++++ examples/database_examples/neo4j_example.py | 94 ++++++ .../database_examples/pgvector_example.py | 99 +++++++ examples/database_examples/qdrant_example.py | 93 ++++++ .../database_examples/weaviate_example.py | 92 ++++++ 15 files changed, 1039 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/db_examples_tests.yml create mode 100644 examples/database_examples/chromadb_example.py create mode 100644 examples/database_examples/falkordb_example.py create mode 100644 examples/database_examples/kuzu_example.py create mode 100644 examples/database_examples/milvus_example.py create mode 100644 examples/database_examples/neo4j_example.py create mode 100644 examples/database_examples/pgvector_example.py create mode 100644 examples/database_examples/qdrant_example.py create mode 100644 examples/database_examples/weaviate_example.py diff --git a/.github/workflows/db_examples_tests.yml b/.github/workflows/db_examples_tests.yml new file mode 100644 index 000000000..f5379eefc --- /dev/null +++ b/.github/workflows/db_examples_tests.yml @@ -0,0 +1,269 @@ +name: Reusable DB Examples Tests + +on: + workflow_call: + inputs: + databases: + required: false + type: string + default: "all" + description: "Which databases to run (comma-separated or 'all')" + python-version: + required: false + type: string + default: "3.11.x" + secrets: + LLM_MODEL: + required: true + LLM_ENDPOINT: + required: true + LLM_API_KEY: + required: true + LLM_API_VERSION: + required: true + EMBEDDING_MODEL: + required: true + EMBEDDING_ENDPOINT: + required: true + EMBEDDING_API_KEY: + required: true + EMBEDDING_API_VERSION: + required: true + QDRANT_API_URL: + required: false + QDRANT_API_KEY: + required: false + WEAVIATE_API_URL: + required: false + WEAVIATE_API_KEY: + required: false + POSTGRES_PASSWORD: + required: false + NEO4J_API_URL: + required: false + NEO4J_API_KEY: + required: false + + + + +jobs: + run-db-example-neo4j: + name: "Neo4j DB Example Test" + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j') }} + steps: + - name: Check out + uses: actions/checkout@master + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Install Neo4j extra + run: | + poetry install -E neo4j + + - name: Run Neo4j Example + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPH_DATABASE_PROVIDER: "neo4j" + GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }} + GRAPH_DATABASE_USERNAME: "neo4j" + GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }} + run: | + poetry run python examples/database_examples/neo4j_example.py + + run-db-example-kuzu: + name: "Kuzu DB Example Test" + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu') }} + + steps: + - name: Check out + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Install Kuzu extra + run: | + poetry install -E kuzu + + - name: Run Kuzu Example + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPH_DATABASE_PROVIDER: "kuzu" + run: | + poetry run python examples/database_examples/kuzu_example.py + + run-db-example-milvus: + name: "Milvus DB Example Test" + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'milvus') }} + + steps: + - name: Check out + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Install Milvus extra + run: | + poetry install -E milvus + + - name: Run Milvus Example + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: | + poetry run python examples/database_examples/milvus_example.py + + run-db-example-weaviate: + name: "Weaviate DB Example Test" + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'weaviate') }} + steps: + - name: Check out + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Install Weaviate extra + run: | + poetry install -E weaviate + + - name: Run Weaviate Example + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }} + VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }} + run: | + poetry run python examples/database_examples/weaviate_example.py + + run-db-example-qdrant: + name: "Qdrant DB Example Test" + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'qdrant') }} + defaults: + run: + shell: bash + + steps: + - name: Check out + uses: actions/checkout@master + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Install Qdrant extra + run: | + poetry install -E qdrant + + - name: Run Qdrant Example + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }} + VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }} + run: | + poetry run python examples/database_examples/qdrant_example.py + + run-db-example-pgvector: + name: "PostgreSQL PGVector DB Example Test" + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'postgres') }} + services: + postgres: + image: pgvector/pgvector:pg17 + env: + POSTGRES_USER: cognee + POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} + POSTGRES_DB: cognee_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + steps: + - name: Check out + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Install PGVector extra + run: | + poetry install -E postgres + + - name: Run PGVector Example + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: | + poetry run python examples/database_examples/pgvector_example.py + + \ No newline at end of file diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index 05e6d1f80..d0bd1ffcb 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -77,6 +77,12 @@ jobs: uses: ./.github/workflows/examples_tests.yml secrets: inherit + db-examples-tests: + name: DB Examples Tests + needs: [vector-db-tests] + uses: ./.github/workflows/db_examples_tests.yml + secrets: inherit + # Additional LLM tests gemini-tests: name: Gemini Tests @@ -113,6 +119,7 @@ jobs: python-version-tests, vector-db-tests, example-tests, + db-examples-tests, gemini-tests, ollama-tests, relational-db-migration-tests, @@ -131,6 +138,7 @@ jobs: "${{ needs.python-version-tests.result }}" == "success" && "${{ needs.vector-db-tests.result }}" == "success" && "${{ needs.example-tests.result }}" == "success" && + "${{ needs.db-examples-tests.result }}" == "success" && "${{ needs.relational-db-migration-tests.result }}" == "success" && "${{ needs.gemini-tests.result }}" == "success" && "${{ needs.docker-compose-test.result }}" == "success" && diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index a24a7bb92..27b3e4f04 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -155,7 +155,7 @@ class config: if hasattr(graph_db_config, key): object.__setattr__(graph_db_config, key, value) else: - raise AttributeError(message=f"'{key}' is not a valid attribute of the config.") + raise AttributeError(f"'{key}' is not a valid attribute of the config.") @staticmethod def set_vector_db_config(config_dict: dict): diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index 9daebb760..16da8dbb0 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -178,10 +178,18 @@ class MilvusAdapter(VectorDBInterface): ): from pymilvus import MilvusException, exceptions + if limit <= 0: + return [] client = self.get_milvus_client() if query_text is None and query_vector is None: raise ValueError("One of query_text or query_vector must be provided!") + if not client.has_collection(collection_name=collection_name): + logger.warning( + f"Collection '{collection_name}' not found in MilvusAdapter.search; returning []." + ) + return [] + try: query_vector = query_vector or (await self.embed_data([query_text]))[0] @@ -208,12 +216,19 @@ class MilvusAdapter(VectorDBInterface): ) for result in results[0] ] - except exceptions.CollectionNotExistException as error: - raise CollectionNotFoundError( - f"Collection '{collection_name}' does not exist!" - ) from error + except exceptions.CollectionNotExistException: + logger.warning( + f"Collection '{collection_name}' not found (exception) in MilvusAdapter.search; returning []." + ) + return [] except MilvusException as e: - logger.error(f"Error during search in collection '{collection_name}': {str(e)}") + # Catch other Milvus errors that are "collection not found" (paranoid safety) + if "collection not found" in str(e).lower() or "schema" in str(e).lower(): + logger.warning( + f"Collection '{collection_name}' not found (MilvusException) in MilvusAdapter.search; returning []." + ) + return [] + logger.error(f"Error searching Milvus collection '{collection_name}': {e}") raise e async def batch_search( diff --git a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py index 72d66e2de..951f73e6e 100644 --- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py @@ -159,12 +159,24 @@ class QDrantAdapter(VectorDBInterface): query_vector: Optional[List[float]] = None, limit: int = 15, with_vector: bool = False, - ): + ) -> List[ScoredResult]: from qdrant_client.http.exceptions import UnexpectedResponse if query_text is None and query_vector is None: raise InvalidValueError(message="One of query_text or query_vector must be provided!") + if limit <= 0: + return [] + + if not await self.has_collection(collection_name): + logger.warning( + f"Collection '{collection_name}' not found in QdrantAdapter.search; returning []." + ) + return [] + + if query_vector is None: + query_vector = (await self.embed_data([query_text]))[0] + try: client = self.get_qdrant_client() diff --git a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py index dac6bc83f..f0b3497cd 100644 --- a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +++ b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py @@ -113,7 +113,7 @@ class WeaviateAdapter(VectorDBInterface): # ) else: data_point: DataObject = data_points[0] - if collection.data.exists(data_point.uuid): + if await collection.data.exists(data_point.uuid): return await collection.data.update( uuid=data_point.uuid, vector=data_point.vector, diff --git a/cognee/modules/retrieval/utils/brute_force_triplet_search.py b/cognee/modules/retrieval/utils/brute_force_triplet_search.py index 0a08fbd00..2e6775e64 100644 --- a/cognee/modules/retrieval/utils/brute_force_triplet_search.py +++ b/cognee/modules/retrieval/utils/brute_force_triplet_search.py @@ -146,7 +146,7 @@ async def brute_force_search( async def search_in_collection(collection_name: str): try: return await vector_engine.search( - collection_name=collection_name, query_text=query, limit=0 + collection_name=collection_name, query_text=query, limit=50 ) except CollectionNotFoundError: return [] diff --git a/examples/database_examples/chromadb_example.py b/examples/database_examples/chromadb_example.py new file mode 100644 index 000000000..defa7f78d --- /dev/null +++ b/examples/database_examples/chromadb_example.py @@ -0,0 +1,87 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with ChromaDB + + This example: + 1. Configures Cognee to use ChromaDB as vector database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + # Configure ChromaDB as the vector database provider + cognee.config.set_vector_db_config( + { + "vector_db_url": "http://localhost:8000", # Default ChromaDB server URL + "vector_db_key": "", # ChromaDB doesn't require an API key by default + "vector_db_provider": "chromadb", # Specify ChromaDB as provider + } + ) + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "chromadb_example" + + # Add sample text to the dataset + sample_text = """ChromaDB is an open-source embedding database. + It allows users to store and query embeddings and their associated metadata. + ChromaDB can be deployed in various ways: in-memory, on disk via sqlite, or as a persistent service. + It is designed to be fast, scalable, and easy to use, making it a popular choice for AI applications. + The database is built to handle vector search efficiently, which is essential for semantic search applications. + ChromaDB supports multiple distance metrics for vector similarity search and can be integrated with various ML frameworks.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "ChromaDB" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="ChromaDB") + print("\nInsights about ChromaDB:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "vector search" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name] + ) + print("\nChunks about vector search:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/database_examples/falkordb_example.py b/examples/database_examples/falkordb_example.py new file mode 100644 index 000000000..9e096fd81 --- /dev/null +++ b/examples/database_examples/falkordb_example.py @@ -0,0 +1,87 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with FalkorDB + + This example: + 1. Configures Cognee to use FalkorDB as graph database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + # Configure FalkorDB as the graph database provider + cognee.config.set_graph_db_config( + { + "graph_database_url": "localhost", # FalkorDB URL (using Redis protocol) + "graph_database_port": 6379, + "graph_database_provider": "falkordb", + } + ) + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "falkordb_example" + + # Add sample text to the dataset + sample_text = """FalkorDB is a graph database that evolved from RedisGraph. + It is focused on providing high-performance graph operations. + FalkorDB uses sparse adjacency matrices to represent the graph data structure. + It supports the Cypher query language for querying graph data. + FalkorDB can be integrated with vector search capabilities for AI applications. + It provides a Redis module, allowing users to leverage Redis's features alongside graph capabilities.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "FalkorDB" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="FalkorDB") + print("\nInsights about FalkorDB:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "graph database" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name] + ) + print("\nChunks about graph database:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/database_examples/kuzu_example.py b/examples/database_examples/kuzu_example.py new file mode 100644 index 000000000..a31404cbc --- /dev/null +++ b/examples/database_examples/kuzu_example.py @@ -0,0 +1,85 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with KuzuDB + + This example: + 1. Configures Cognee to use KuzuDB as graph database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + # Configure KuzuDB as the graph database provider + cognee.config.set_graph_db_config( + { + "graph_database_provider": "kuzu", # Specify KuzuDB as provider + } + ) + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "kuzu_example" + + # Add sample text to the dataset + sample_text = """KuzuDB is a graph database system optimized for running complex graph analytics. + It is designed to be a high-performance graph database for data science workloads. + KuzuDB is built with modern hardware optimizations in mind. + It provides support for property graphs and offers a Cypher-like query language. + KuzuDB can handle both transactional and analytical graph workloads. + The database now includes vector search capabilities for AI applications and semantic search.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "KuzuDB" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="KuzuDB") + print("\nInsights about KuzuDB:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "graph database" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name] + ) + print("\nChunks about graph database:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/database_examples/milvus_example.py b/examples/database_examples/milvus_example.py new file mode 100644 index 000000000..7a8b74c71 --- /dev/null +++ b/examples/database_examples/milvus_example.py @@ -0,0 +1,89 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with Milvus + + This example: + 1. Configures Cognee to use Milvus as vector database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + local_milvus_db_path = os.path.join(cognee_directory_path, "databases", "milvus.db") + + # Configure Milvus as the vector database provider + cognee.config.set_vector_db_config( + { + "vector_db_url": local_milvus_db_path, # Enter Milvus Endpoint if exist + "vector_db_key": "", # Enter Token + "vector_db_provider": "milvus", # Specify Milvus as provider + } + ) + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "milvus_example" + + # Add sample text to the dataset + sample_text = """Milvus is an open-source vector database built to power AI applications. + It is designed for storing, indexing, and querying large-scale vector datasets. + Milvus implements efficient approximate nearest neighbor search algorithms. + It features advanced indexing techniques like HNSW, IVF, PQ, and more. + Milvus supports hybrid searches combining vector similarity with scalar filtering. + The system can be deployed standalone, in clusters, or through a cloud service.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "Milvus" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Milvus") + print("\nInsights about Milvus:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "vector similarity" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="vector similarity", datasets=[dataset_name] + ) + print("\nChunks about vector similarity:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/database_examples/neo4j_example.py b/examples/database_examples/neo4j_example.py new file mode 100644 index 000000000..45985610f --- /dev/null +++ b/examples/database_examples/neo4j_example.py @@ -0,0 +1,94 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with Neo4j + + This example: + 1. Configures Cognee to use Neo4j as graph database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + + # Set up Neo4j credentials in .env file and get the values from environment variables + neo4j_url = os.getenv("GRAPH_DATABASE_URL") + neo4j_user = os.getenv("GRAPH_DATABASE_USERNAME") + neo4j_pass = os.getenv("GRAPH_DATABASE_PASSWORD") + + # Configure Neo4j as the graph database provider + cognee.config.set_graph_db_config( + { + "graph_database_url": neo4j_url, # Neo4j Bolt URL + "graph_database_provider": "neo4j", # Specify Neo4j as provider + "graph_database_username": neo4j_user, # Neo4j username + "graph_database_password": neo4j_pass, # Neo4j password + } + ) + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "neo4j_example" + + # Add sample text to the dataset + sample_text = """Neo4j is a graph database management system. + It stores data in nodes and relationships rather than tables as in traditional relational databases. + Neo4j provides a powerful query language called Cypher for graph traversal and analysis. + It now supports vector indexing for similarity search with the vector index plugin. + Neo4j allows embedding generation and vector search to be combined with graph operations. + Applications can use Neo4j to connect vector search with graph context for more meaningful results.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "Neo4j" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Neo4j") + print("\nInsights about Neo4j:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "graph database" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name] + ) + print("\nChunks about graph database:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/database_examples/pgvector_example.py b/examples/database_examples/pgvector_example.py new file mode 100644 index 000000000..19dbb44e9 --- /dev/null +++ b/examples/database_examples/pgvector_example.py @@ -0,0 +1,99 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with PGVector + + This example: + 1. Configures Cognee to use PostgreSQL with PGVector extension as vector database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + # Configure PGVector as the vector database provider + cognee.config.set_vector_db_config( + { + "vector_db_provider": "pgvector", # Specify PGVector as provider + } + ) + + # Configure PostgreSQL connection details + # These settings are required for PGVector + cognee.config.set_relational_db_config( + { + "db_path": "", + "db_name": "cognee_db", + "db_host": "127.0.0.1", + "db_port": "5432", + "db_username": "cognee", + "db_password": "cognee", + "db_provider": "postgres", + } + ) + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "pgvector_example" + + # Add sample text to the dataset + sample_text = """PGVector is an extension for PostgreSQL that adds vector similarity search capabilities. + It supports multiple indexing methods, including IVFFlat, HNSW, and brute-force search. + PGVector allows you to store vector embeddings directly in your PostgreSQL database. + It provides distance functions like L2 distance, inner product, and cosine distance. + Using PGVector, you can perform both metadata filtering and vector similarity search in a single query. + The extension is often used for applications like semantic search, recommendations, and image similarity.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "PGVector" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="PGVector") + print("\nInsights about PGVector:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "vector similarity" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="vector similarity", datasets=[dataset_name] + ) + print("\nChunks about vector similarity:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/database_examples/qdrant_example.py b/examples/database_examples/qdrant_example.py new file mode 100644 index 000000000..b2d2eef7d --- /dev/null +++ b/examples/database_examples/qdrant_example.py @@ -0,0 +1,93 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with Qdrant + + This example: + 1. Configures Cognee to use Qdrant as vector database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + + # Set up Qdrant credentials in .env file and get the values from environment variables + qdrant_url = os.getenv("VECTOR_DB_URL") + qdrant_key = os.getenv("VECTOR_DB_KEY") + + # Configure Qdrant as the vector database provider + + cognee.config.set_vector_db_config( + { + "vector_db_url": qdrant_url, # Enter Qdrant URL + "vector_db_key": qdrant_key, # API key needed + "vector_db_provider": "qdrant", # Specify Qdrant as provider + } + ) + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "qdrant_example" + + # Add sample text to the dataset + sample_text = """Qdrant is a vector similarity search engine and vector database. + It provides a production-ready service with a convenient API for storing, searching, and managing vectors. + Qdrant supports filtering during vector search, which is essential for real-world applications. + The database implements various performance optimizations, including HNSW index for approximate nearest neighbor search. + Qdrant can be deployed via Docker, as a managed cloud service, or directly on bare metal. + It also supports payload and metadata storage alongside the vectors, allowing for rich data retrieval.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "Qdrant" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Qdrant") + print("\nInsights about Qdrant:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "vector search" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name] + ) + print("\nChunks about vector search:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/database_examples/weaviate_example.py b/examples/database_examples/weaviate_example.py new file mode 100644 index 000000000..b14cc3c97 --- /dev/null +++ b/examples/database_examples/weaviate_example.py @@ -0,0 +1,92 @@ +import os +import pathlib +import asyncio +import cognee +from cognee.modules.search.types import SearchType + + +async def main(): + """ + Example script demonstrating how to use Cognee with Weaviate + + This example: + 1. Configures Cognee to use Weaviate as vector database + 2. Sets up data directories + 3. Adds sample data to Cognee + 4. Processes (cognifies) the data + 5. Performs different types of searches + """ + + # Set up Weaviate credentials in .env file and get the values from environment variables + weaviate_url = os.getenv("VECTOR_DB_URL") + weaviate_key = os.getenv("VECTOR_DB_KEY") + + # Configure Weaviate as the vector database provider + cognee.config.set_vector_db_config( + { + "vector_db_url": weaviate_url, # Set your Weaviate Endpoint + "vector_db_key": weaviate_key, # Set your Weaviate API key + "vector_db_provider": "weaviate", # Specify Weaviate as provider + } + ) + + # Set up data directories for storing documents and system files + # You should adjust these paths to your needs + current_dir = pathlib.Path(__file__).parent + data_directory_path = str(current_dir / "data_storage") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = str(current_dir / "cognee_system") + cognee.config.system_root_directory(cognee_directory_path) + + # Clean any existing data (optional) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # Create a dataset + dataset_name = "weaviate_example" + + # Add sample text to the dataset + sample_text = """Weaviate is an open-source vector database that stores both objects and vectors. + It enables vector search with GraphQL-based filtering capabilities. + Weaviate can be deployed in the cloud, on-premise, or embedded in your application. + It allows users to search through vectors using different algorithms and metrics. + Weaviate supports various modules for text2vec transformations, including BERT, OpenAI, and other models. + It can index data in multiple ways and offers features like semantic search, classification, and contextualization.""" + + # Add the sample text to the dataset + await cognee.add([sample_text], dataset_name) + + # Process the added document to extract knowledge + await cognee.cognify([dataset_name]) + + # Now let's perform some searches + # 1. Search for insights related to "Weaviate" + insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Weaviate") + print("\nInsights about Weaviate:") + for result in insights_results: + print(f"- {result}") + + # 2. Search for text chunks related to "vector search" + chunks_results = await cognee.search( + query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name] + ) + print("\nChunks about vector search:") + for result in chunks_results: + print(f"- {result}") + + # 3. Get graph completion related to databases + graph_completion_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="database" + ) + print("\nGraph completion for databases:") + for result in graph_completion_results: + print(f"- {result}") + + # Clean up (optional) + # await cognee.prune.prune_data() + # await cognee.prune.prune_system(metadata=True) + + +if __name__ == "__main__": + asyncio.run(main())