refactor: Add better handling of configuration for dataset to database handler
This commit is contained in:
parent
64a3ee96c4
commit
593f17fcdc
6 changed files with 71 additions and 28 deletions
|
|
@ -93,6 +93,8 @@ DB_NAME=cognee_db
|
||||||
|
|
||||||
# Default (local file-based)
|
# Default (local file-based)
|
||||||
GRAPH_DATABASE_PROVIDER="kuzu"
|
GRAPH_DATABASE_PROVIDER="kuzu"
|
||||||
|
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
|
||||||
|
GRAPH_DATASET_DATABASE_HANDLER="kuzu"
|
||||||
|
|
||||||
# -- To switch to Remote Kuzu uncomment and fill these: -------------------------------------------------------------
|
# -- To switch to Remote Kuzu uncomment and fill these: -------------------------------------------------------------
|
||||||
#GRAPH_DATABASE_PROVIDER="kuzu"
|
#GRAPH_DATABASE_PROVIDER="kuzu"
|
||||||
|
|
@ -117,6 +119,8 @@ VECTOR_DB_PROVIDER="lancedb"
|
||||||
# Not needed if a cloud vector database is not used
|
# Not needed if a cloud vector database is not used
|
||||||
VECTOR_DB_URL=
|
VECTOR_DB_URL=
|
||||||
VECTOR_DB_KEY=
|
VECTOR_DB_KEY=
|
||||||
|
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
|
||||||
|
VECTOR_DATASET_DATABASE_HANDLER="lancedb"
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# 🧩 Ontology resolver settings
|
# 🧩 Ontology resolver settings
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,8 @@ from typing import Union
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
||||||
from cognee.infrastructure.files.storage.config import file_storage_config
|
from cognee.infrastructure.files.storage.config import file_storage_config
|
||||||
from cognee.modules.users.methods import get_user
|
from cognee.modules.users.methods import get_user
|
||||||
|
|
@ -16,23 +16,59 @@ vector_db_config = ContextVar("vector_db_config", default=None)
|
||||||
graph_db_config = ContextVar("graph_db_config", default=None)
|
graph_db_config = ContextVar("graph_db_config", default=None)
|
||||||
session_user = ContextVar("session_user", default=None)
|
session_user = ContextVar("session_user", default=None)
|
||||||
|
|
||||||
VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
|
|
||||||
GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor", "neo4j"]
|
|
||||||
|
|
||||||
|
|
||||||
async def set_session_user_context_variable(user):
|
async def set_session_user_context_variable(user):
|
||||||
session_user.set(user)
|
session_user.set(user)
|
||||||
|
|
||||||
|
|
||||||
def multi_user_support_possible():
|
def multi_user_support_possible():
|
||||||
graph_db_config = get_graph_context_config()
|
graph_db_config = get_graph_config()
|
||||||
vector_db_config = get_vectordb_context_config()
|
vector_db_config = get_vectordb_config()
|
||||||
# TODO: Make sure dataset database handler and provider match, remove multi_user support check, add error if no dataset database handler exists for provider
|
|
||||||
return (
|
graph_handler = graph_db_config.graph_dataset_database_handler
|
||||||
graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
|
vector_handler = vector_db_config.vector_dataset_database_handler
|
||||||
and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
|
from cognee.infrastructure.databases.dataset_database_handler import (
|
||||||
|
supported_dataset_database_handlers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if graph_handler not in supported_dataset_database_handlers:
|
||||||
|
raise EnvironmentError(
|
||||||
|
"Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||||
|
f"Selected graph dataset to database handler: {graph_handler}\n"
|
||||||
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if vector_handler not in supported_dataset_database_handlers:
|
||||||
|
raise EnvironmentError(
|
||||||
|
"Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||||
|
f"Selected vector dataset to database handler: {vector_handler}\n"
|
||||||
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
supported_dataset_database_handlers[graph_handler]["handler_provider"]
|
||||||
|
!= graph_db_config.graph_database_provider
|
||||||
|
):
|
||||||
|
raise EnvironmentError(
|
||||||
|
"The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||||
|
f"Selected graph database provider: {graph_db_config.graph_database_provider}\n"
|
||||||
|
f"Selected graph dataset to database handler: {graph_handler}\n"
|
||||||
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
supported_dataset_database_handlers[vector_handler]["handler_provider"]
|
||||||
|
!= vector_db_config.vector_db_provider
|
||||||
|
):
|
||||||
|
raise EnvironmentError(
|
||||||
|
"The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
||||||
|
f"Selected vector database provider: {vector_db_config.vector_db_provider}\n"
|
||||||
|
f"Selected vector dataset to database handler: {vector_handler}\n"
|
||||||
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def backend_access_control_enabled():
|
def backend_access_control_enabled():
|
||||||
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
|
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
|
||||||
|
|
@ -42,12 +78,7 @@ def backend_access_control_enabled():
|
||||||
return multi_user_support_possible()
|
return multi_user_support_possible()
|
||||||
elif backend_access_control.lower() == "true":
|
elif backend_access_control.lower() == "true":
|
||||||
# If enabled, ensure that the current graph and vector DBs can support it
|
# If enabled, ensure that the current graph and vector DBs can support it
|
||||||
multi_user_support = multi_user_support_possible()
|
return multi_user_support_possible()
|
||||||
if not multi_user_support:
|
|
||||||
raise EnvironmentError(
|
|
||||||
"ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,10 @@ from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler impor
|
||||||
)
|
)
|
||||||
|
|
||||||
supported_dataset_database_handlers = {
|
supported_dataset_database_handlers = {
|
||||||
"neo4j_aura": Neo4jAuraDatasetDatabaseHandler,
|
"neo4j_aura": {
|
||||||
"lancedb": LanceDBDatasetDatabaseHandler,
|
"handler_instance": Neo4jAuraDatasetDatabaseHandler,
|
||||||
"kuzu": KuzuDatasetDatabaseHandler,
|
"handler_provider": "neo4j",
|
||||||
|
},
|
||||||
|
"lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"},
|
||||||
|
"kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,10 @@
|
||||||
from .supported_dataset_database_handlers import supported_dataset_database_handlers
|
from .supported_dataset_database_handlers import supported_dataset_database_handlers
|
||||||
|
|
||||||
|
|
||||||
def use_dataset_database_handler(dataset_database_handler_name, dataset_database_handler):
|
def use_dataset_database_handler(
|
||||||
supported_dataset_database_handlers[dataset_database_handler_name] = dataset_database_handler
|
dataset_database_handler_name, dataset_database_handler, dataset_database_provider
|
||||||
|
):
|
||||||
|
supported_dataset_database_handlers[dataset_database_handler_name] = {
|
||||||
|
"handler_instance": dataset_database_handler,
|
||||||
|
"handler_provider": dataset_database_provider,
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,9 @@
|
||||||
import os
|
|
||||||
import asyncio
|
|
||||||
import requests
|
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import Union, Optional
|
from typing import Union, Optional
|
||||||
|
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
from cognee.base_config import get_base_config
|
|
||||||
from cognee.modules.data.methods import create_dataset
|
from cognee.modules.data.methods import create_dataset
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
|
|
@ -25,7 +21,7 @@ async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict:
|
||||||
)
|
)
|
||||||
|
|
||||||
handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler]
|
handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler]
|
||||||
return await handler.create_dataset(dataset_id, user)
|
return await handler["handler_instance"].create_dataset(dataset_id, user)
|
||||||
|
|
||||||
|
|
||||||
async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict:
|
async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict:
|
||||||
|
|
@ -36,7 +32,7 @@ async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict:
|
||||||
)
|
)
|
||||||
|
|
||||||
handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler]
|
handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler]
|
||||||
return await handler.create_dataset(dataset_id, user)
|
return await handler["handler_instance"].create_dataset(dataset_id, user)
|
||||||
|
|
||||||
|
|
||||||
async def _existing_dataset_database(
|
async def _existing_dataset_database(
|
||||||
|
|
|
||||||
|
|
@ -534,6 +534,10 @@ def setup_logging(log_level=None, name=None):
|
||||||
# Get a configured logger and log system information
|
# Get a configured logger and log system information
|
||||||
logger = structlog.get_logger(name if name else __name__)
|
logger = structlog.get_logger(name if name else __name__)
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"From version 0.5.0 onwards, Cognee will run with multi-user access control mode set to on by default. Data isolation between different users and datasets will be enforced and data created before multi-user access control mode was turned on won't be accessible by default. To disable multi-user access control mode and regain access to old data set the environment variable ENABLE_BACKEND_ACCESS_CONTROL to false before starting Cognee. For more information, please refer to the Cognee documentation."
|
||||||
|
)
|
||||||
|
|
||||||
if logs_dir is not None:
|
if logs_dir is not None:
|
||||||
logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
|
logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue