159 lines
7.7 KiB
Python
159 lines
7.7 KiB
Python
import os
|
|
from contextvars import ContextVar
|
|
from typing import Union
|
|
from uuid import UUID
|
|
|
|
from cognee.base_config import get_base_config
|
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
|
from cognee.infrastructure.databases.utils import resolve_dataset_database_connection_info
|
|
from cognee.infrastructure.files.storage.config import file_storage_config
|
|
from cognee.modules.users.methods import get_user
|
|
|
|
# Note: ContextVar allows us to use different graph db configurations in Cognee
|
|
# for different async tasks, threads and processes
|
|
vector_db_config = ContextVar("vector_db_config", default=None)
|
|
graph_db_config = ContextVar("graph_db_config", default=None)
|
|
session_user = ContextVar("session_user", default=None)
|
|
|
|
|
|
async def set_session_user_context_variable(user):
|
|
session_user.set(user)
|
|
|
|
|
|
def multi_user_support_possible():
|
|
graph_db_config = get_graph_config()
|
|
vector_db_config = get_vectordb_config()
|
|
|
|
graph_handler = graph_db_config.graph_dataset_database_handler
|
|
vector_handler = vector_db_config.vector_dataset_database_handler
|
|
from cognee.infrastructure.databases.dataset_database_handler import (
|
|
supported_dataset_database_handlers,
|
|
)
|
|
|
|
if graph_handler not in supported_dataset_database_handlers:
|
|
raise EnvironmentError(
|
|
"Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
f"Selected graph dataset to database handler: {graph_handler}\n"
|
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
)
|
|
|
|
if vector_handler not in supported_dataset_database_handlers:
|
|
raise EnvironmentError(
|
|
"Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
f"Selected vector dataset to database handler: {vector_handler}\n"
|
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
)
|
|
|
|
if (
|
|
supported_dataset_database_handlers[graph_handler]["handler_provider"]
|
|
!= graph_db_config.graph_database_provider
|
|
):
|
|
raise EnvironmentError(
|
|
"The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
f"Selected graph database provider: {graph_db_config.graph_database_provider}\n"
|
|
f"Selected graph dataset to database handler: {graph_handler}\n"
|
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
)
|
|
|
|
if (
|
|
supported_dataset_database_handlers[vector_handler]["handler_provider"]
|
|
!= vector_db_config.vector_db_provider
|
|
):
|
|
raise EnvironmentError(
|
|
"The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
f"Selected vector database provider: {vector_db_config.vector_db_provider}\n"
|
|
f"Selected vector dataset to database handler: {vector_handler}\n"
|
|
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
)
|
|
|
|
return True
|
|
|
|
|
|
def backend_access_control_enabled():
|
|
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
|
|
if backend_access_control is None:
|
|
# If backend access control is not defined in environment variables,
|
|
# enable it by default if graph and vector DBs can support it, otherwise disable it
|
|
return multi_user_support_possible()
|
|
elif backend_access_control.lower() == "true":
|
|
# If enabled, ensure that the current graph and vector DBs can support it
|
|
return multi_user_support_possible()
|
|
return False
|
|
|
|
|
|
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
|
|
"""
|
|
If backend access control is enabled this function will ensure all datasets have their own databases,
|
|
access to which will be enforced by given permissions.
|
|
Database name will be determined by dataset_id and LanceDB and KuzuDB use will be enforced.
|
|
|
|
Note: This is only currently supported by the following databases:
|
|
Relational: SQLite, Postgres
|
|
Vector: LanceDB
|
|
Graph: KuzuDB
|
|
|
|
Args:
|
|
dataset: Cognee dataset name or id
|
|
user_id: UUID of the owner of the dataset
|
|
|
|
Returns:
|
|
|
|
"""
|
|
|
|
if not backend_access_control_enabled():
|
|
return
|
|
|
|
user = await get_user(user_id)
|
|
|
|
# To ensure permissions are enforced properly all datasets will have their own databases
|
|
dataset_database = await get_or_create_dataset_database(dataset, user)
|
|
# Ensure that all connection info is resolved properly
|
|
dataset_database = await resolve_dataset_database_connection_info(dataset_database)
|
|
|
|
base_config = get_base_config()
|
|
data_root_directory = os.path.join(
|
|
base_config.data_root_directory, str(user.tenant_id or user.id)
|
|
)
|
|
databases_directory_path = os.path.join(
|
|
base_config.system_root_directory, "databases", str(user.id)
|
|
)
|
|
|
|
# Set vector and graph database configuration based on dataset database information
|
|
# TODO: Add better handling of vector and graph config accross Cognee.
|
|
# LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
|
|
vector_config = {
|
|
"vector_db_provider": dataset_database.vector_database_provider,
|
|
"vector_db_url": dataset_database.vector_database_url,
|
|
"vector_db_key": dataset_database.vector_database_key,
|
|
"vector_db_name": dataset_database.vector_database_name,
|
|
}
|
|
|
|
graph_config = {
|
|
"graph_database_provider": dataset_database.graph_database_provider,
|
|
"graph_database_url": dataset_database.graph_database_url,
|
|
"graph_database_name": dataset_database.graph_database_name,
|
|
"graph_database_key": dataset_database.graph_database_key,
|
|
"graph_file_path": os.path.join(
|
|
databases_directory_path, dataset_database.graph_database_name
|
|
),
|
|
"graph_database_username": dataset_database.graph_database_connection_info.get(
|
|
"graph_database_username", ""
|
|
),
|
|
"graph_database_password": dataset_database.graph_database_connection_info.get(
|
|
"graph_database_password", ""
|
|
),
|
|
"graph_dataset_database_handler": "",
|
|
"graph_database_port": "",
|
|
}
|
|
|
|
storage_config = {
|
|
"data_root_directory": data_root_directory,
|
|
}
|
|
|
|
# Use ContextVar to use these graph and vector configurations are used
|
|
# in the current async context across Cognee
|
|
graph_db_config.set(graph_config)
|
|
vector_db_config.set(vector_config)
|
|
file_storage_config.set(storage_config)
|