refactor: Remove global context handling from api code

This commit is contained in:
Igor Ilic 2025-05-20 21:39:55 +02:00
parent 6466f66a76
commit 9aa8e543cb
8 changed files with 13 additions and 22 deletions

View file

@ -74,4 +74,6 @@ LITELLM_LOG="ERROR"
# Relational: SQLite, Postgres
# Vector: LanceDB
# Graph: KuzuDB
#
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
ENABLE_BACKEND_ACCESS_CONTROL=False

View file

@ -41,9 +41,6 @@ def get_add_router() -> APIRouter:
raise ValueError("No dataset found with the provided datasetName.")
try:
# Set database information to be used for current cognify async context
await set_database_global_context_variables(datasetName, user)
if isinstance(data, str) and data.startswith("http"):
if "github" in data:
# Perform git clone if the URL is from GitHub

View file

@ -7,7 +7,6 @@ from fastapi.responses import JSONResponse
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
from cognee.shared.data_models import KnowledgeGraph
from cognee.context_global_variables import set_database_global_context_variables
class CognifyPayloadDTO(BaseModel):
@ -21,24 +20,10 @@ def get_cognify_router() -> APIRouter:
@router.post("/", response_model=None)
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
"""This endpoint is responsible for the cognitive processing of the content."""
async def cognify_dataset(dataset, user, graph_model):
"""Process a single dataset in its own async task to allow use of context database values per dataset."""
# Set DB context for just this dataset
await set_database_global_context_variables(dataset, user)
# Run Cognify on this dataset
from cognee.api.v1.cognify import cognify as cognee_cognify
await cognee_cognify(dataset, user, graph_model)
from cognee.api.v1.cognify import cognify as cognee_cognify
try:
# Create cognify task for each dataset
tasks = [
cognify_dataset(dataset, user, payload.graph_model) for dataset in payload.datasets
]
# Wait for all datasets to finish.
await asyncio.gather(*tasks)
await cognee_cognify(payload.datasets, user, payload.graph_model)
except Exception as error:
return JSONResponse(status_code=409, content={"error": str(error)})

View file

@ -4,6 +4,8 @@ from fastapi import APIRouter
from fastapi.responses import JSONResponse
# TODO: Add security verification for using these endpoints. We need to check that only users with
# Admin access from a Tenant can make changes by using these endpoints for their Tenant.
def get_permissions_router() -> APIRouter:
permissions_router = APIRouter()

View file

@ -3,7 +3,7 @@ from contextvars import ContextVar
from typing import Union
from uuid import UUID
from cognee.api.v1.infrastructure import get_or_create_dataset_database
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
from cognee.modules.users.models import User
# Note: ContextVar allows us to use different graph db configurations in Cognee
@ -15,7 +15,8 @@ graph_db_config = ContextVar("graph_db_config", default=None)
async def set_database_global_context_variables(dataset: Union[str, UUID], user: User):
"""
If backend access control is enabled this function will ensure all datasets have their own databases,
access to which will be enforced by given permissions. Database name will be determined by dataset_id.
access to which will be enforced by given permissions.
Database name will be determined by dataset_id and LanceDB and KuzuDB use will be enforced.
Note: This is only currently supported by the following databases:
Relational: SQLite, Postgres

View file

@ -14,6 +14,7 @@ from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User
from cognee.modules.pipelines.operations import log_pipeline_run_initiated
from cognee.context_global_variables import set_database_global_context_variables
from cognee.infrastructure.databases.relational import (
create_db_and_tables as create_relational_db_and_tables,
@ -131,6 +132,9 @@ async def run_pipeline(
):
check_dataset_name(dataset.name)
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
await set_database_global_context_variables(dataset.name, user)
# Ugly hack, but no easier way to do this.
if pipeline_name == "add_pipeline":
# Refresh the add pipeline status so data is added to a dataset.