refactor: Add better handling of configuration for dataset to database handler
This commit is contained in:
parent
64a3ee96c4
commit
593f17fcdc
6 changed files with 71 additions and 28 deletions
|
|
@ -93,6 +93,8 @@ DB_NAME=cognee_db
|
|||
|
||||
# Default (local file-based)
|
||||
GRAPH_DATABASE_PROVIDER="kuzu"
|
||||
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
|
||||
GRAPH_DATASET_DATABASE_HANDLER="kuzu"
|
||||
|
||||
# -- To switch to Remote Kuzu uncomment and fill these: -------------------------------------------------------------
|
||||
#GRAPH_DATABASE_PROVIDER="kuzu"
|
||||
|
|
@ -117,6 +119,8 @@ VECTOR_DB_PROVIDER="lancedb"
|
|||
# Not needed if a cloud vector database is not used
|
||||
VECTOR_DB_URL=
|
||||
VECTOR_DB_KEY=
|
||||
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
|
||||
VECTOR_DATASET_DATABASE_HANDLER="lancedb"
|
||||
|
||||
################################################################################
|
||||
# 🧩 Ontology resolver settings
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ from typing import Union
|
|||
from uuid import UUID
|
||||
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
||||
from cognee.infrastructure.files.storage.config import file_storage_config
|
||||
from cognee.modules.users.methods import get_user
|
||||
|
|
@ -16,23 +16,59 @@ vector_db_config = ContextVar("vector_db_config", default=None)
|
|||
graph_db_config = ContextVar("graph_db_config", default=None)
|
||||
session_user = ContextVar("session_user", default=None)
|
||||
|
||||
VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
|
||||
GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor", "neo4j"]
|
||||
|
||||
|
||||
async def set_session_user_context_variable(user):
|
||||
session_user.set(user)
|
||||
|
||||
|
||||
def multi_user_support_possible():
|
||||
graph_db_config = get_graph_context_config()
|
||||
vector_db_config = get_vectordb_context_config()
|
||||
# TODO: Make sure dataset database handler and provider match, remove multi_user support check, add error if no dataset database handler exists for provider
|
||||
return (
|
||||
graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
|
||||
and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
|
||||
graph_db_config = get_graph_config()
|
||||
vector_db_config = get_vectordb_config()
|
||||
|
||||
graph_handler = graph_db_config.graph_dataset_database_handler
|
||||
vector_handler = vector_db_config.vector_dataset_database_handler
|
||||
from cognee.infrastructure.databases.dataset_database_handler import (
|
||||
supported_dataset_database_handlers,
|
||||
)
|
||||
|
||||
if graph_handler not in supported_dataset_database_handlers:
|
||||
raise EnvironmentError(
|
||||
"Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||
f"Selected graph dataset to database handler: {graph_handler}\n"
|
||||
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||
)
|
||||
|
||||
if vector_handler not in supported_dataset_database_handlers:
|
||||
raise EnvironmentError(
|
||||
"Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||
f"Selected vector dataset to database handler: {vector_handler}\n"
|
||||
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||
)
|
||||
|
||||
if (
|
||||
supported_dataset_database_handlers[graph_handler]["handler_provider"]
|
||||
!= graph_db_config.graph_database_provider
|
||||
):
|
||||
raise EnvironmentError(
|
||||
"The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||
f"Selected graph database provider: {graph_db_config.graph_database_provider}\n"
|
||||
f"Selected graph dataset to database handler: {graph_handler}\n"
|
||||
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||
)
|
||||
|
||||
if (
|
||||
supported_dataset_database_handlers[vector_handler]["handler_provider"]
|
||||
!= vector_db_config.vector_db_provider
|
||||
):
|
||||
raise EnvironmentError(
|
||||
"The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||
f"Selected vector database provider: {vector_db_config.vector_db_provider}\n"
|
||||
f"Selected vector dataset to database handler: {vector_handler}\n"
|
||||
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def backend_access_control_enabled():
|
||||
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
|
||||
|
|
@ -42,12 +78,7 @@ def backend_access_control_enabled():
|
|||
return multi_user_support_possible()
|
||||
elif backend_access_control.lower() == "true":
|
||||
# If enabled, ensure that the current graph and vector DBs can support it
|
||||
multi_user_support = multi_user_support_possible()
|
||||
if not multi_user_support:
|
||||
raise EnvironmentError(
|
||||
"ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
|
||||
)
|
||||
return True
|
||||
return multi_user_support_possible()
|
||||
return False
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,10 @@ from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler impor
|
|||
)
|
||||
|
||||
supported_dataset_database_handlers = {
|
||||
"neo4j_aura": Neo4jAuraDatasetDatabaseHandler,
|
||||
"lancedb": LanceDBDatasetDatabaseHandler,
|
||||
"kuzu": KuzuDatasetDatabaseHandler,
|
||||
"neo4j_aura": {
|
||||
"handler_instance": Neo4jAuraDatasetDatabaseHandler,
|
||||
"handler_provider": "neo4j",
|
||||
},
|
||||
"lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"},
|
||||
"kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
from .supported_dataset_database_handlers import supported_dataset_database_handlers
|
||||
|
||||
|
||||
def use_dataset_database_handler(dataset_database_handler_name, dataset_database_handler):
|
||||
supported_dataset_database_handlers[dataset_database_handler_name] = dataset_database_handler
|
||||
def use_dataset_database_handler(
|
||||
dataset_database_handler_name, dataset_database_handler, dataset_database_provider
|
||||
):
|
||||
supported_dataset_database_handlers[dataset_database_handler_name] = {
|
||||
"handler_instance": dataset_database_handler,
|
||||
"handler_provider": dataset_database_provider,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,9 @@
|
|||
import os
|
||||
import asyncio
|
||||
import requests
|
||||
from uuid import UUID
|
||||
from typing import Union, Optional
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.modules.data.methods import create_dataset
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
|
|
@ -25,7 +21,7 @@ async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict:
|
|||
)
|
||||
|
||||
handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler]
|
||||
return await handler.create_dataset(dataset_id, user)
|
||||
return await handler["handler_instance"].create_dataset(dataset_id, user)
|
||||
|
||||
|
||||
async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict:
|
||||
|
|
@ -36,7 +32,7 @@ async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict:
|
|||
)
|
||||
|
||||
handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler]
|
||||
return await handler.create_dataset(dataset_id, user)
|
||||
return await handler["handler_instance"].create_dataset(dataset_id, user)
|
||||
|
||||
|
||||
async def _existing_dataset_database(
|
||||
|
|
|
|||
|
|
@ -534,6 +534,10 @@ def setup_logging(log_level=None, name=None):
|
|||
# Get a configured logger and log system information
|
||||
logger = structlog.get_logger(name if name else __name__)
|
||||
|
||||
logger.warning(
|
||||
"From version 0.5.0 onwards, Cognee will run with multi-user access control mode set to on by default. Data isolation between different users and datasets will be enforced and data created before multi-user access control mode was turned on won't be accessible by default. To disable multi-user access control mode and regain access to old data set the environment variable ENABLE_BACKEND_ACCESS_CONTROL to false before starting Cognee. For more information, please refer to the Cognee documentation."
|
||||
)
|
||||
|
||||
if logs_dir is not None:
|
||||
logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue