refactor: Enable multi user mode by default if graph and vector db providers support it

This commit is contained in:
Igor Ilic 2025-10-29 16:28:09 +01:00
parent 76396d5d27
commit fb7e74eaa8
7 changed files with 53 additions and 38 deletions

View file

@ -169,8 +169,9 @@ REQUIRE_AUTHENTICATION=False
# Vector: LanceDB # Vector: LanceDB
# Graph: KuzuDB # Graph: KuzuDB
# #
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset # It enforces creation of databases per Cognee user + dataset. Does not work with some graph and database providers.
ENABLE_BACKEND_ACCESS_CONTROL=False # Disable mode when using not supported graph/vector databases.
ENABLE_BACKEND_ACCESS_CONTROL=True
################################################################################ ################################################################################
# ☁️ Cloud Sync Settings # ☁️ Cloud Sync Settings

View file

@ -4,6 +4,8 @@ from typing import Union
from uuid import UUID from uuid import UUID
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
from cognee.infrastructure.databases.graph.config import get_graph_context_config
from cognee.infrastructure.databases.utils import get_or_create_dataset_database from cognee.infrastructure.databases.utils import get_or_create_dataset_database
from cognee.infrastructure.files.storage.config import file_storage_config from cognee.infrastructure.files.storage.config import file_storage_config
from cognee.modules.users.methods import get_user from cognee.modules.users.methods import get_user
@ -14,11 +16,50 @@ vector_db_config = ContextVar("vector_db_config", default=None)
graph_db_config = ContextVar("graph_db_config", default=None) graph_db_config = ContextVar("graph_db_config", default=None)
session_user = ContextVar("session_user", default=None) session_user = ContextVar("session_user", default=None)
vector_dbs_with_multi_user_support = ["lancedb"]
graph_dbs_with_multi_user_support = ["kuzu"]
async def set_session_user_context_variable(user): async def set_session_user_context_variable(user):
session_user.set(user) session_user.set(user)
def check_multi_user_support():
graph_db_config = get_graph_context_config()
vector_db_config = get_vectordb_context_config()
if (
graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support
and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support
):
return True
else:
return False
def check_backend_access_control_mode():
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
if backend_access_control is None:
# If backend access control is not defined in environment variables,
# enable it by default if graph and vector DBs can support it, otherwise disable it
multi_user_support = check_multi_user_support()
if multi_user_support:
return "true"
else:
return "false"
elif backend_access_control.lower() == "true":
# If enabled, ensure that the current graph and vector DBs can support it
multi_user_support = check_multi_user_support()
if not multi_user_support:
raise EnvironmentError(
"ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
)
else:
return "true"
else:
# If explicitly disabled, return false
return "false"
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID): async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
""" """
If backend access control is enabled this function will ensure all datasets have their own databases, If backend access control is enabled this function will ensure all datasets have their own databases,
@ -40,7 +81,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
base_config = get_base_config() base_config = get_base_config()
if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": if not check_backend_access_control_mode() == "true":
return return
user = await get_user(user_id) user = await get_user(user_id)

View file

@ -1,4 +1,3 @@
import os
import json import json
import asyncio import asyncio
from uuid import UUID from uuid import UUID
@ -9,6 +8,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.shared.utils import send_telemetry from cognee.shared.utils import send_telemetry
from cognee.context_global_variables import set_database_global_context_variables from cognee.context_global_variables import set_database_global_context_variables
from cognee.context_global_variables import check_backend_access_control_mode
from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.engine.models.node_set import NodeSet
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
@ -74,7 +74,7 @@ async def search(
) )
# Use search function filtered by permissions if access control is enabled # Use search function filtered by permissions if access control is enabled
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": if check_backend_access_control_mode() == "true":
search_results = await authorized_search( search_results = await authorized_search(
query_type=query_type, query_type=query_type,
query_text=query_text, query_text=query_text,
@ -156,7 +156,7 @@ async def search(
) )
else: else:
# This is for maintaining backwards compatibility # This is for maintaining backwards compatibility
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": if check_backend_access_control_mode() == "true":
return_value = [] return_value = []
for search_result in search_results: for search_result in search_results:
prepared_search_results = await prepare_search_result(search_result) prepared_search_results = await prepare_search_result(search_result)

View file

@ -5,6 +5,7 @@ from ..models import User
from ..get_fastapi_users import get_fastapi_users from ..get_fastapi_users import get_fastapi_users
from .get_default_user import get_default_user from .get_default_user import get_default_user
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.context_global_variables import check_backend_access_control_mode
logger = get_logger("get_authenticated_user") logger = get_logger("get_authenticated_user")
@ -12,7 +13,7 @@ logger = get_logger("get_authenticated_user")
# Check environment variable to determine authentication requirement # Check environment variable to determine authentication requirement
REQUIRE_AUTHENTICATION = ( REQUIRE_AUTHENTICATION = (
os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true" os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
or os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true" or check_backend_access_control_mode() == "true"
) )
fastapi_users = get_fastapi_users() fastapi_users = get_fastapi_users()

View file

@ -31,6 +31,9 @@ from cognee.infrastructure.databases.vector.pgvector import (
async def main(): async def main():
# Disable backend access control to migrate relational data
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
# Clean all data stored in Cognee # Clean all data stored in Cognee
await cognee.prune.prune_data() await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True) await cognee.prune.prune_system(metadata=True)

View file

View file

@ -1,31 +0,0 @@
# Logs Directory
This directory contains the application logs for Cognee.
## Log Files
- Log files are named by date in the format `YYYY-MM-DD_HH-MM-SS.log`
- Logs are stored in plain text format with a consistent structure
- Each log entry includes:
- Timestamp (ISO format)
- Log level (padded to consistent width)
- Message
- Additional context (if any)
- Logger name (in square brackets)
- Exception tracebacks are included for error logs
## Sample Log Entry
```
2025-03-27T13:05:27.481446Z [INFO ] Structured log message user_id=user123 action=login status=success [TestLogger]
```
## Retention Policy
The system automatically keeps only the 10 most recent log files. Older log files are automatically deleted when new log files are created. This prevents excessive disk usage in long-running deployments.
## Usage
Logs are automatically generated by the application's logging mechanism. No manual actions are required to use this feature.
The logs directory structure is preserved in version control, but the log files themselves are gitignored.