feat: Test db examples (#817)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Boris <boris@topoteretes.com>
Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
This commit is contained in:
Hande 2025-05-16 09:30:47 +02:00 committed by GitHub
parent 1dd179b6dd
commit 3b07f3c08d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1039 additions and 9 deletions

269
.github/workflows/db_examples_tests.yml vendored Normal file
View file

@ -0,0 +1,269 @@
name: Reusable DB Examples Tests
on:
workflow_call:
inputs:
databases:
required: false
type: string
default: "all"
description: "Which databases to run (comma-separated or 'all')"
python-version:
required: false
type: string
default: "3.11.x"
secrets:
LLM_MODEL:
required: true
LLM_ENDPOINT:
required: true
LLM_API_KEY:
required: true
LLM_API_VERSION:
required: true
EMBEDDING_MODEL:
required: true
EMBEDDING_ENDPOINT:
required: true
EMBEDDING_API_KEY:
required: true
EMBEDDING_API_VERSION:
required: true
QDRANT_API_URL:
required: false
QDRANT_API_KEY:
required: false
WEAVIATE_API_URL:
required: false
WEAVIATE_API_KEY:
required: false
POSTGRES_PASSWORD:
required: false
NEO4J_API_URL:
required: false
NEO4J_API_KEY:
required: false
jobs:
run-db-example-neo4j:
name: "Neo4j DB Example Test"
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j') }}
steps:
- name: Check out
uses: actions/checkout@master
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install Neo4j extra
run: |
poetry install -E neo4j
- name: Run Neo4j Example
env:
ENV: dev
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPH_DATABASE_PROVIDER: "neo4j"
GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
GRAPH_DATABASE_USERNAME: "neo4j"
GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
run: |
poetry run python examples/database_examples/neo4j_example.py
run-db-example-kuzu:
name: "Kuzu DB Example Test"
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu') }}
steps:
- name: Check out
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install Kuzu extra
run: |
poetry install -E kuzu
- name: Run Kuzu Example
env:
ENV: dev
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPH_DATABASE_PROVIDER: "kuzu"
run: |
poetry run python examples/database_examples/kuzu_example.py
run-db-example-milvus:
name: "Milvus DB Example Test"
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'milvus') }}
steps:
- name: Check out
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install Milvus extra
run: |
poetry install -E milvus
- name: Run Milvus Example
env:
ENV: dev
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: |
poetry run python examples/database_examples/milvus_example.py
run-db-example-weaviate:
name: "Weaviate DB Example Test"
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'weaviate') }}
steps:
- name: Check out
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install Weaviate extra
run: |
poetry install -E weaviate
- name: Run Weaviate Example
env:
ENV: dev
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }}
VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }}
run: |
poetry run python examples/database_examples/weaviate_example.py
run-db-example-qdrant:
name: "Qdrant DB Example Test"
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'qdrant') }}
defaults:
run:
shell: bash
steps:
- name: Check out
uses: actions/checkout@master
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install Qdrant extra
run: |
poetry install -E qdrant
- name: Run Qdrant Example
env:
ENV: dev
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
run: |
poetry run python examples/database_examples/qdrant_example.py
run-db-example-pgvector:
name: "PostgreSQL PGVector DB Example Test"
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'postgres') }}
services:
postgres:
image: pgvector/pgvector:pg17
env:
POSTGRES_USER: cognee
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_DB: cognee_db
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install PGVector extra
run: |
poetry install -E postgres
- name: Run PGVector Example
env:
ENV: dev
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: |
poetry run python examples/database_examples/pgvector_example.py

View file

@ -77,6 +77,12 @@ jobs:
uses: ./.github/workflows/examples_tests.yml
secrets: inherit
db-examples-tests:
name: DB Examples Tests
needs: [vector-db-tests]
uses: ./.github/workflows/db_examples_tests.yml
secrets: inherit
# Additional LLM tests
gemini-tests:
name: Gemini Tests
@ -113,6 +119,7 @@ jobs:
python-version-tests,
vector-db-tests,
example-tests,
db-examples-tests,
gemini-tests,
ollama-tests,
relational-db-migration-tests,
@ -131,6 +138,7 @@ jobs:
"${{ needs.python-version-tests.result }}" == "success" &&
"${{ needs.vector-db-tests.result }}" == "success" &&
"${{ needs.example-tests.result }}" == "success" &&
"${{ needs.db-examples-tests.result }}" == "success" &&
"${{ needs.relational-db-migration-tests.result }}" == "success" &&
"${{ needs.gemini-tests.result }}" == "success" &&
"${{ needs.docker-compose-test.result }}" == "success" &&

View file

@ -155,7 +155,7 @@ class config:
if hasattr(graph_db_config, key):
object.__setattr__(graph_db_config, key, value)
else:
raise AttributeError(message=f"'{key}' is not a valid attribute of the config.")
raise AttributeError(f"'{key}' is not a valid attribute of the config.")
@staticmethod
def set_vector_db_config(config_dict: dict):

View file

@ -178,10 +178,18 @@ class MilvusAdapter(VectorDBInterface):
):
from pymilvus import MilvusException, exceptions
if limit <= 0:
return []
client = self.get_milvus_client()
if query_text is None and query_vector is None:
raise ValueError("One of query_text or query_vector must be provided!")
if not client.has_collection(collection_name=collection_name):
logger.warning(
f"Collection '{collection_name}' not found in MilvusAdapter.search; returning []."
)
return []
try:
query_vector = query_vector or (await self.embed_data([query_text]))[0]
@ -208,12 +216,19 @@ class MilvusAdapter(VectorDBInterface):
)
for result in results[0]
]
except exceptions.CollectionNotExistException as error:
raise CollectionNotFoundError(
f"Collection '{collection_name}' does not exist!"
) from error
except exceptions.CollectionNotExistException:
logger.warning(
f"Collection '{collection_name}' not found (exception) in MilvusAdapter.search; returning []."
)
return []
except MilvusException as e:
logger.error(f"Error during search in collection '{collection_name}': {str(e)}")
# Catch other Milvus errors that are "collection not found" (paranoid safety)
if "collection not found" in str(e).lower() or "schema" in str(e).lower():
logger.warning(
f"Collection '{collection_name}' not found (MilvusException) in MilvusAdapter.search; returning []."
)
return []
logger.error(f"Error searching Milvus collection '{collection_name}': {e}")
raise e
async def batch_search(

View file

@ -159,12 +159,24 @@ class QDrantAdapter(VectorDBInterface):
query_vector: Optional[List[float]] = None,
limit: int = 15,
with_vector: bool = False,
):
) -> List[ScoredResult]:
from qdrant_client.http.exceptions import UnexpectedResponse
if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
if limit <= 0:
return []
if not await self.has_collection(collection_name):
logger.warning(
f"Collection '{collection_name}' not found in QdrantAdapter.search; returning []."
)
return []
if query_vector is None:
query_vector = (await self.embed_data([query_text]))[0]
try:
client = self.get_qdrant_client()

View file

@ -113,7 +113,7 @@ class WeaviateAdapter(VectorDBInterface):
# )
else:
data_point: DataObject = data_points[0]
if collection.data.exists(data_point.uuid):
if await collection.data.exists(data_point.uuid):
return await collection.data.update(
uuid=data_point.uuid,
vector=data_point.vector,

View file

@ -146,7 +146,7 @@ async def brute_force_search(
async def search_in_collection(collection_name: str):
try:
return await vector_engine.search(
collection_name=collection_name, query_text=query, limit=0
collection_name=collection_name, query_text=query, limit=50
)
except CollectionNotFoundError:
return []

View file

@ -0,0 +1,87 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with ChromaDB
This example:
1. Configures Cognee to use ChromaDB as vector database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Configure ChromaDB as the vector database provider
cognee.config.set_vector_db_config(
{
"vector_db_url": "http://localhost:8000", # Default ChromaDB server URL
"vector_db_key": "", # ChromaDB doesn't require an API key by default
"vector_db_provider": "chromadb", # Specify ChromaDB as provider
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "chromadb_example"
# Add sample text to the dataset
sample_text = """ChromaDB is an open-source embedding database.
It allows users to store and query embeddings and their associated metadata.
ChromaDB can be deployed in various ways: in-memory, on disk via sqlite, or as a persistent service.
It is designed to be fast, scalable, and easy to use, making it a popular choice for AI applications.
The database is built to handle vector search efficiently, which is essential for semantic search applications.
ChromaDB supports multiple distance metrics for vector similarity search and can be integrated with various ML frameworks."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "ChromaDB"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="ChromaDB")
print("\nInsights about ChromaDB:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "vector search"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name]
)
print("\nChunks about vector search:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,87 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with FalkorDB
This example:
1. Configures Cognee to use FalkorDB as graph database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Configure FalkorDB as the graph database provider
cognee.config.set_graph_db_config(
{
"graph_database_url": "localhost", # FalkorDB URL (using Redis protocol)
"graph_database_port": 6379,
"graph_database_provider": "falkordb",
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "falkordb_example"
# Add sample text to the dataset
sample_text = """FalkorDB is a graph database that evolved from RedisGraph.
It is focused on providing high-performance graph operations.
FalkorDB uses sparse adjacency matrices to represent the graph data structure.
It supports the Cypher query language for querying graph data.
FalkorDB can be integrated with vector search capabilities for AI applications.
It provides a Redis module, allowing users to leverage Redis's features alongside graph capabilities."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "FalkorDB"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="FalkorDB")
print("\nInsights about FalkorDB:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "graph database"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name]
)
print("\nChunks about graph database:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,85 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with KuzuDB
This example:
1. Configures Cognee to use KuzuDB as graph database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Configure KuzuDB as the graph database provider
cognee.config.set_graph_db_config(
{
"graph_database_provider": "kuzu", # Specify KuzuDB as provider
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "kuzu_example"
# Add sample text to the dataset
sample_text = """KuzuDB is a graph database system optimized for running complex graph analytics.
It is designed to be a high-performance graph database for data science workloads.
KuzuDB is built with modern hardware optimizations in mind.
It provides support for property graphs and offers a Cypher-like query language.
KuzuDB can handle both transactional and analytical graph workloads.
The database now includes vector search capabilities for AI applications and semantic search."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "KuzuDB"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="KuzuDB")
print("\nInsights about KuzuDB:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "graph database"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name]
)
print("\nChunks about graph database:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,89 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with Milvus
This example:
1. Configures Cognee to use Milvus as vector database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
local_milvus_db_path = os.path.join(cognee_directory_path, "databases", "milvus.db")
# Configure Milvus as the vector database provider
cognee.config.set_vector_db_config(
{
"vector_db_url": local_milvus_db_path, # Enter Milvus Endpoint if exist
"vector_db_key": "", # Enter Token
"vector_db_provider": "milvus", # Specify Milvus as provider
}
)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "milvus_example"
# Add sample text to the dataset
sample_text = """Milvus is an open-source vector database built to power AI applications.
It is designed for storing, indexing, and querying large-scale vector datasets.
Milvus implements efficient approximate nearest neighbor search algorithms.
It features advanced indexing techniques like HNSW, IVF, PQ, and more.
Milvus supports hybrid searches combining vector similarity with scalar filtering.
The system can be deployed standalone, in clusters, or through a cloud service."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "Milvus"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Milvus")
print("\nInsights about Milvus:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "vector similarity"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="vector similarity", datasets=[dataset_name]
)
print("\nChunks about vector similarity:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,94 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with Neo4j
This example:
1. Configures Cognee to use Neo4j as graph database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Set up Neo4j credentials in .env file and get the values from environment variables
neo4j_url = os.getenv("GRAPH_DATABASE_URL")
neo4j_user = os.getenv("GRAPH_DATABASE_USERNAME")
neo4j_pass = os.getenv("GRAPH_DATABASE_PASSWORD")
# Configure Neo4j as the graph database provider
cognee.config.set_graph_db_config(
{
"graph_database_url": neo4j_url, # Neo4j Bolt URL
"graph_database_provider": "neo4j", # Specify Neo4j as provider
"graph_database_username": neo4j_user, # Neo4j username
"graph_database_password": neo4j_pass, # Neo4j password
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "neo4j_example"
# Add sample text to the dataset
sample_text = """Neo4j is a graph database management system.
It stores data in nodes and relationships rather than tables as in traditional relational databases.
Neo4j provides a powerful query language called Cypher for graph traversal and analysis.
It now supports vector indexing for similarity search with the vector index plugin.
Neo4j allows embedding generation and vector search to be combined with graph operations.
Applications can use Neo4j to connect vector search with graph context for more meaningful results."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "Neo4j"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Neo4j")
print("\nInsights about Neo4j:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "graph database"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name]
)
print("\nChunks about graph database:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,99 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with PGVector
This example:
1. Configures Cognee to use PostgreSQL with PGVector extension as vector database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Configure PGVector as the vector database provider
cognee.config.set_vector_db_config(
{
"vector_db_provider": "pgvector", # Specify PGVector as provider
}
)
# Configure PostgreSQL connection details
# These settings are required for PGVector
cognee.config.set_relational_db_config(
{
"db_path": "",
"db_name": "cognee_db",
"db_host": "127.0.0.1",
"db_port": "5432",
"db_username": "cognee",
"db_password": "cognee",
"db_provider": "postgres",
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "pgvector_example"
# Add sample text to the dataset
sample_text = """PGVector is an extension for PostgreSQL that adds vector similarity search capabilities.
It supports multiple indexing methods, including IVFFlat, HNSW, and brute-force search.
PGVector allows you to store vector embeddings directly in your PostgreSQL database.
It provides distance functions like L2 distance, inner product, and cosine distance.
Using PGVector, you can perform both metadata filtering and vector similarity search in a single query.
The extension is often used for applications like semantic search, recommendations, and image similarity."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "PGVector"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="PGVector")
print("\nInsights about PGVector:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "vector similarity"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="vector similarity", datasets=[dataset_name]
)
print("\nChunks about vector similarity:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,93 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with Qdrant
This example:
1. Configures Cognee to use Qdrant as vector database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Set up Qdrant credentials in .env file and get the values from environment variables
qdrant_url = os.getenv("VECTOR_DB_URL")
qdrant_key = os.getenv("VECTOR_DB_KEY")
# Configure Qdrant as the vector database provider
cognee.config.set_vector_db_config(
{
"vector_db_url": qdrant_url, # Enter Qdrant URL
"vector_db_key": qdrant_key, # API key needed
"vector_db_provider": "qdrant", # Specify Qdrant as provider
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "qdrant_example"
# Add sample text to the dataset
sample_text = """Qdrant is a vector similarity search engine and vector database.
It provides a production-ready service with a convenient API for storing, searching, and managing vectors.
Qdrant supports filtering during vector search, which is essential for real-world applications.
The database implements various performance optimizations, including HNSW index for approximate nearest neighbor search.
Qdrant can be deployed via Docker, as a managed cloud service, or directly on bare metal.
It also supports payload and metadata storage alongside the vectors, allowing for rich data retrieval."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "Qdrant"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Qdrant")
print("\nInsights about Qdrant:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "vector search"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name]
)
print("\nChunks about vector search:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,92 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with Weaviate
This example:
1. Configures Cognee to use Weaviate as vector database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Set up Weaviate credentials in .env file and get the values from environment variables
weaviate_url = os.getenv("VECTOR_DB_URL")
weaviate_key = os.getenv("VECTOR_DB_KEY")
# Configure Weaviate as the vector database provider
cognee.config.set_vector_db_config(
{
"vector_db_url": weaviate_url, # Set your Weaviate Endpoint
"vector_db_key": weaviate_key, # Set your Weaviate API key
"vector_db_provider": "weaviate", # Specify Weaviate as provider
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "weaviate_example"
# Add sample text to the dataset
sample_text = """Weaviate is an open-source vector database that stores both objects and vectors.
It enables vector search with GraphQL-based filtering capabilities.
Weaviate can be deployed in the cloud, on-premise, or embedded in your application.
It allows users to search through vectors using different algorithms and metrics.
Weaviate supports various modules for text2vec transformations, including BERT, OpenAI, and other models.
It can index data in multiple ways and offers features like semantic search, classification, and contextualization."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "Weaviate"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Weaviate")
print("\nInsights about Weaviate:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "vector search"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name]
)
print("\nChunks about vector search:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())