<!-- .github/pull_request_template.md -->
## Description
Adds a Neptune Analytics 'hybrid' integration layer to the Cognee.ai
memory storage layer. The following configuration will use Amazon
Neptune Analytics to store all nodes, edges, and vector embeddings for
the Cognee.ai memory conversation.
```
cognee.config.set_graph_db_config(
{
"graph_database_provider": "neptune_analytics", # Specify Neptune Analytics as provider
"graph_database_url": graph_endpoint_url, # Neptune Analytics endpoint with the format neptune-graph://<GRAPH_ID>
}
)
cognee.config.set_vector_db_config(
{
"vector_db_provider": "neptune_analytics", # Specify Neptune Analytics as provider
"vector_db_url": graph_endpoint_url, # Neptune Analytics endpoint with the format neptune-graph://<GRAPH_ID>
}
)
```
For example, see
[neptune_analytics_example.py](08a3a1d2a8/examples/database_examples/neptune_analytics_example.py)
Related: https://github.com/topoteretes/cognee-starter/pull/11
## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---------
Signed-off-by: Andrew Carbonetto <andrew.carbonetto@improving.com>
Signed-off-by: Andy Kwok <andy.kwok@improving.com>
Co-authored-by: Andy Kwok <andy.kwok@improving.com>
Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
107 lines
4.2 KiB
Python
107 lines
4.2 KiB
Python
import base64
|
|
import json
|
|
import os
|
|
import pathlib
|
|
import asyncio
|
|
import cognee
|
|
from cognee.modules.search.types import SearchType
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
async def main():
|
|
"""
|
|
Example script demonstrating how to use Cognee with Amazon Neptune Analytics
|
|
|
|
This example:
|
|
1. Configures Cognee to use Neptune Analytics as graph database
|
|
2. Sets up data directories
|
|
3. Adds sample data to Cognee
|
|
4. Processes/cognifies the data
|
|
5. Performs different types of searches
|
|
"""
|
|
|
|
# Set up Amazon credentials in .env file and get the values from environment variables
|
|
graph_endpoint_url = "neptune-graph://" + os.getenv('GRAPH_ID', "")
|
|
|
|
# Configure Neptune Analytics as the graph & vector database provider
|
|
cognee.config.set_graph_db_config(
|
|
{
|
|
"graph_database_provider": "neptune_analytics", # Specify Neptune Analytics as provider
|
|
"graph_database_url": graph_endpoint_url, # Neptune Analytics endpoint with the format neptune-graph://<GRAPH_ID>
|
|
}
|
|
)
|
|
cognee.config.set_vector_db_config(
|
|
{
|
|
"vector_db_provider": "neptune_analytics", # Specify Neptune Analytics as provider
|
|
"vector_db_url": graph_endpoint_url, # Neptune Analytics endpoint with the format neptune-graph://<GRAPH_ID>
|
|
}
|
|
)
|
|
|
|
# Set up data directories for storing documents and system files
|
|
# You should adjust these paths to your needs
|
|
current_dir = pathlib.Path(__file__).parent
|
|
data_directory_path = str(current_dir / "data_storage")
|
|
cognee.config.data_root_directory(data_directory_path)
|
|
|
|
cognee_directory_path = str(current_dir / "cognee_system")
|
|
cognee.config.system_root_directory(cognee_directory_path)
|
|
|
|
# Clean any existing data (optional)
|
|
await cognee.prune.prune_data()
|
|
await cognee.prune.prune_system(metadata=True)
|
|
|
|
# Create a dataset
|
|
dataset_name = "neptune_example"
|
|
|
|
# Add sample text to the dataset
|
|
sample_text_1 = """Neptune Analytics is a memory-optimized graph database engine for analytics. With Neptune
|
|
Analytics, you can get insights and find trends by processing large amounts of graph data in seconds. To analyze
|
|
graph data quickly and easily, Neptune Analytics stores large graph datasets in memory. It supports a library of
|
|
optimized graph analytic algorithms, low-latency graph queries, and vector search capabilities within graph
|
|
traversals.
|
|
"""
|
|
|
|
sample_text_2 = """Neptune Analytics is an ideal choice for investigatory, exploratory, or data-science workloads
|
|
that require fast iteration for data, analytical and algorithmic processing, or vector search on graph data. It
|
|
complements Amazon Neptune Database, a popular managed graph database. To perform intensive analysis, you can load
|
|
the data from a Neptune Database graph or snapshot into Neptune Analytics. You can also load graph data that's
|
|
stored in Amazon S3.
|
|
"""
|
|
|
|
# Add the sample text to the dataset
|
|
await cognee.add([sample_text_1, sample_text_2], dataset_name)
|
|
|
|
# Process the added document to extract knowledge
|
|
await cognee.cognify([dataset_name])
|
|
|
|
# Now let's perform some searches
|
|
# 1. Search for insights related to "Neptune Analytics"
|
|
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Neptune Analytics")
|
|
print("\n========Insights about Neptune Analytics========:")
|
|
for result in insights_results:
|
|
print(f"- {result}")
|
|
|
|
# 2. Search for text chunks related to "graph database"
|
|
chunks_results = await cognee.search(
|
|
query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name]
|
|
)
|
|
print("\n========Chunks about graph database========:")
|
|
for result in chunks_results:
|
|
print(f"- {result}")
|
|
|
|
# 3. Get graph completion related to databases
|
|
graph_completion_results = await cognee.search(
|
|
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
|
|
)
|
|
print("\n========Graph completion for databases========:")
|
|
for result in graph_completion_results:
|
|
print(f"- {result}")
|
|
|
|
# Clean up (optional)
|
|
await cognee.prune.prune_data()
|
|
await cognee.prune.prune_system(metadata=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|