diff --git a/.env.template b/.env.template index ae2cb1338..d178965e8 100644 --- a/.env.template +++ b/.env.template @@ -93,6 +93,8 @@ DB_NAME=cognee_db # Default (local file-based) GRAPH_DATABASE_PROVIDER="kuzu" +# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset +GRAPH_DATASET_DATABASE_HANDLER="kuzu" # -- To switch to Remote Kuzu uncomment and fill these: ------------------------------------------------------------- #GRAPH_DATABASE_PROVIDER="kuzu" @@ -117,6 +119,8 @@ VECTOR_DB_PROVIDER="lancedb" # Not needed if a cloud vector database is not used VECTOR_DB_URL= VECTOR_DB_KEY= +# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset +VECTOR_DATASET_DATABASE_HANDLER="lancedb" ################################################################################ # 🧩 Ontology resolver settings diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 2b6ffa058..0e7e16178 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -4,8 +4,8 @@ from typing import Union from uuid import UUID from cognee.base_config import get_base_config -from cognee.infrastructure.databases.vector.config import get_vectordb_context_config -from cognee.infrastructure.databases.graph.config import get_graph_context_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.infrastructure.databases.utils import get_or_create_dataset_database from cognee.infrastructure.files.storage.config import file_storage_config from cognee.modules.users.methods import get_user @@ -16,23 +16,59 @@ vector_db_config = ContextVar("vector_db_config", default=None) graph_db_config = ContextVar("graph_db_config", default=None) session_user = ContextVar("session_user", default=None) -VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"] -GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor", "neo4j"] - async def set_session_user_context_variable(user): session_user.set(user) def multi_user_support_possible(): - graph_db_config = get_graph_context_config() - vector_db_config = get_vectordb_context_config() - # TODO: Make sure dataset database handler and provider match, remove multi_user support check, add error if no dataset database handler exists for provider - return ( - graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT - and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT + graph_db_config = get_graph_config() + vector_db_config = get_vectordb_config() + + graph_handler = graph_db_config.graph_dataset_database_handler + vector_handler = vector_db_config.vector_dataset_database_handler + from cognee.infrastructure.databases.dataset_database_handler import ( + supported_dataset_database_handlers, ) + if graph_handler not in supported_dataset_database_handlers: + raise EnvironmentError( + "Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected graph dataset to database handler: {graph_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + if vector_handler not in supported_dataset_database_handlers: + raise EnvironmentError( + "Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected vector dataset to database handler: {vector_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + if ( + supported_dataset_database_handlers[graph_handler]["handler_provider"] + != graph_db_config.graph_database_provider + ): + raise EnvironmentError( + "The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected graph database provider: {graph_db_config.graph_database_provider}\n" + f"Selected graph dataset to database handler: {graph_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + if ( + supported_dataset_database_handlers[vector_handler]["handler_provider"] + != vector_db_config.vector_db_provider + ): + raise EnvironmentError( + "The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected vector database provider: {vector_db_config.vector_db_provider}\n" + f"Selected vector dataset to database handler: {vector_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + return True + def backend_access_control_enabled(): backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None) @@ -42,12 +78,7 @@ def backend_access_control_enabled(): return multi_user_support_possible() elif backend_access_control.lower() == "true": # If enabled, ensure that the current graph and vector DBs can support it - multi_user_support = multi_user_support_possible() - if not multi_user_support: - raise EnvironmentError( - "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control." - ) - return True + return multi_user_support_possible() return False diff --git a/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py index 9cc7d9f93..adaa45e33 100644 --- a/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +++ b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py @@ -9,7 +9,10 @@ from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler impor ) supported_dataset_database_handlers = { - "neo4j_aura": Neo4jAuraDatasetDatabaseHandler, - "lancedb": LanceDBDatasetDatabaseHandler, - "kuzu": KuzuDatasetDatabaseHandler, + "neo4j_aura": { + "handler_instance": Neo4jAuraDatasetDatabaseHandler, + "handler_provider": "neo4j", + }, + "lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"}, + "kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"}, } diff --git a/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py b/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py index a583de354..bca2128ee 100644 --- a/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +++ b/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py @@ -1,5 +1,10 @@ from .supported_dataset_database_handlers import supported_dataset_database_handlers -def use_dataset_database_handler(dataset_database_handler_name, dataset_database_handler): - supported_dataset_database_handlers[dataset_database_handler_name] = dataset_database_handler +def use_dataset_database_handler( + dataset_database_handler_name, dataset_database_handler, dataset_database_provider +): + supported_dataset_database_handlers[dataset_database_handler_name] = { + "handler_instance": dataset_database_handler, + "handler_provider": dataset_database_provider, + } diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index f4bacca7e..665355e30 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -1,13 +1,9 @@ -import os -import asyncio -import requests from uuid import UUID from typing import Union, Optional from sqlalchemy import select from sqlalchemy.exc import IntegrityError -from cognee.base_config import get_base_config from cognee.modules.data.methods import create_dataset from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.vector import get_vectordb_config @@ -25,7 +21,7 @@ async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict: ) handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler] - return await handler.create_dataset(dataset_id, user) + return await handler["handler_instance"].create_dataset(dataset_id, user) async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: @@ -36,7 +32,7 @@ async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: ) handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler] - return await handler.create_dataset(dataset_id, user) + return await handler["handler_instance"].create_dataset(dataset_id, user) async def _existing_dataset_database( diff --git a/cognee/shared/logging_utils.py b/cognee/shared/logging_utils.py index e8efde72c..70a0bd37e 100644 --- a/cognee/shared/logging_utils.py +++ b/cognee/shared/logging_utils.py @@ -534,6 +534,10 @@ def setup_logging(log_level=None, name=None): # Get a configured logger and log system information logger = structlog.get_logger(name if name else __name__) + logger.warning( + "From version 0.5.0 onwards, Cognee will run with multi-user access control mode set to on by default. Data isolation between different users and datasets will be enforced and data created before multi-user access control mode was turned on won't be accessible by default. To disable multi-user access control mode and regain access to old data set the environment variable ENABLE_BACKEND_ACCESS_CONTROL to false before starting Cognee. For more information, please refer to the Cognee documentation." + ) + if logs_dir is not None: logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)