feat: new Dataset permissions (#869)

## Description  ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Boris Arzentar <borisarzentar@gmail.com> Co-authored-by: Boris <boris@topoteretes.com>
2025-06-06 14:20:57 +02:00 · 2025-06-06 14:20:57 +02:00 · 1ed6cfd918
commit 1ed6cfd918
parent ebebbb8958
76 changed files with 5322 additions and 4154 deletions
--- a/.env.template
+++ b/.env.template
@ -69,3 +69,11 @@ LITELLM_LOG="ERROR"
 # Set this environment variable to disable sending telemetry data
 # TELEMETRY_DISABLED=1

+# Set this variable to True to enforce usage of backend access control for Cognee
+# Note: This is only currently supported by the following databases:
+#       Relational: SQLite, Postgres
+#       Vector: LanceDB
+#       Graph: KuzuDB
+#
+# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
+ENABLE_BACKEND_ACCESS_CONTROL=False
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@ -215,3 +215,34 @@ jobs:
            AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
            AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          run: poetry run python ./cognee/tests/test_s3.py
+
+  test-parallel-databases:
+    name: Test using different async databases in parallel in Cognee
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Install specific graph db dependency
+        run: |
+          poetry install -E kuzu
+
+      - name: Run parallel databases test
+        env:
+          ENV: 'dev'
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: poetry run python ./cognee/tests/test_parallel_databases.py
--- a/.github/workflows/test_suites.yml
+++ b/.github/workflows/test_suites.yml
@ -47,7 +47,7 @@ jobs:

  relational-db-migration-tests:
    name: Relational DB Migration Tests
-    needs: [ basic-tests, e2e-tests ]
+    needs: [ basic-tests, e2e-tests, graph-db-tests]
    uses: ./.github/workflows/relational_db_migration_tests.yml
    secrets: inherit

@ -79,7 +79,7 @@ jobs:

  db-examples-tests:
    name: DB Examples Tests
-    needs: [vector-db-tests]
+    needs: [vector-db-tests, graph-db-tests, relational-db-migration-tests]
    uses: ./.github/workflows/db_examples_tests.yml
    secrets: inherit

--- a/.github/workflows/vector_db_tests.yml
+++ b/.github/workflows/vector_db_tests.yml
@ -135,6 +135,16 @@ jobs:
      run:
        shell: bash

+    services:
+      qdrant:
+        image: qdrant/qdrant:v1.14.1
+        env:
+          QDRANT__LOG_LEVEL: ERROR
+          QDRANT__SERVICE__API_KEY: qdrant_api_key
+          QDRANT__SERVICE__ENABLE_TLS: 0
+        ports:
+          - 6333:6333
+
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -148,6 +158,19 @@ jobs:
        run: |
          poetry install -E qdrant

+      - name: Wait for Qdrant to be healthy
+        run: |
+          for i in {1..10}; do
+            if curl -f http://127.0.0.1:6333/healthz; then
+              echo "Qdrant is healthy!"
+              exit 0
+            fi
+            echo "Waiting for Qdrant to be healthy..."
+            sleep 3
+          done
+          echo "Qdrant failed to become healthy in time"
+          exit 1
+
      - name: Run default Qdrant
        env:
          ENV: 'dev'
@ -159,8 +182,8 @@ jobs:
          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
-          VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
-          VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
+          VECTOR_DB_URL: 127.0.0.1
+          VECTOR_DB_KEY: qdrant_api_key
        run: poetry run python ./cognee/tests/test_qdrant.py

  run-postgres-tests:
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -1,6 +1,7 @@
 """FastAPI server for the Cognee API."""

 import os
+
 import uvicorn
 from cognee.shared.logging_utils import get_logger
 import sentry_sdk
@ -63,6 +64,7 @@ async def lifespan(app: FastAPI):

 app = FastAPI(debug=app_environment != "prod", lifespan=lifespan)

+
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@ -1,3 +1,4 @@
+from uuid import UUID
 from typing import Union, BinaryIO, List, Optional

 from cognee.modules.pipelines import Task
@ -11,9 +12,21 @@ async def add(
    dataset_name: str = "main_dataset",
    user: User = None,
    node_set: Optional[List[str]] = None,
+    vector_db_config: dict = None,
+    graph_db_config: dict = None,
+    dataset_id: UUID = None,
 ):
-    tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)]
+    tasks = [
+        Task(resolve_data_directories),
+        Task(ingest_data, dataset_name, user, node_set, dataset_id),
+    ]

    await cognee_pipeline(
-        tasks=tasks, datasets=dataset_name, data=data, user=user, pipeline_name="add_pipeline"
+        tasks=tasks,
+        datasets=dataset_id if dataset_id else dataset_name,
+        data=data,
+        user=user,
+        pipeline_name="add_pipeline",
+        vector_db_config=vector_db_config,
+        graph_db_config=graph_db_config,
    )
--- a/cognee/api/v1/add/routers/get_add_router.py
+++ b/cognee/api/v1/add/routers/get_add_router.py
@ -1,4 +1,5 @@
 from uuid import UUID
+
 from fastapi import Form, UploadFile, Depends
 from fastapi.responses import JSONResponse
 from fastapi import APIRouter
@ -20,8 +21,8 @@ def get_add_router() -> APIRouter:
    @router.post("/", response_model=None)
    async def add(
        data: List[UploadFile],
+        datasetName: str,
        datasetId: Optional[UUID] = Form(default=None),
-        datasetName: Optional[str] = Form(default=None),
        user: User = Depends(get_authenticated_user),
    ):
        """This endpoint is responsible for adding data to the graph."""
@ -30,19 +31,13 @@ def get_add_router() -> APIRouter:
        if not datasetId and not datasetName:
            raise ValueError("Either datasetId or datasetName must be provided.")

-        if datasetId and not datasetName:
-            dataset = await get_dataset(user_id=user.id, dataset_id=datasetId)
-            try:
-                datasetName = dataset.name
-            except IndexError:
-                raise ValueError("No dataset found with the provided datasetName.")
-
        try:
            if isinstance(data, str) and data.startswith("http"):
                if "github" in data:
                    # Perform git clone if the URL is from GitHub
                    repo_name = data.split("/")[-1].replace(".git", "")
                    subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
+                    # TODO: Update add call with dataset info
                    await cognee_add(
                        "data://.data/",
                        f"{repo_name}",
@ -53,10 +48,10 @@ def get_add_router() -> APIRouter:
                    response.raise_for_status()

                    file_data = await response.content()
-
+                    # TODO: Update add call with dataset info
                    return await cognee_add(file_data)
            else:
-                await cognee_add(data, datasetName, user=user)
+                await cognee_add(data, dataset_name=datasetName, user=user, dataset_id=datasetId)
        except Exception as error:
            return JSONResponse(status_code=409, content={"error": str(error)})

--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -9,7 +9,7 @@ from cognee.modules.pipelines.tasks.task import Task
 from cognee.modules.users.models import User
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.tasks.documents import (
-    check_permissions_on_documents,
+    check_permissions_on_dataset,
    classify_documents,
    extract_chunks_from_documents,
 )
@ -31,11 +31,18 @@ async def cognify(
    chunker=TextChunker,
    chunk_size: int = None,
    ontology_file_path: Optional[str] = None,
+    vector_db_config: dict = None,
+    graph_db_config: dict = None,
 ):
    tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)

    return await cognee_pipeline(
-        tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline"
+        tasks=tasks,
+        datasets=datasets,
+        user=user,
+        pipeline_name="cognify_pipeline",
+        vector_db_config=vector_db_config,
+        graph_db_config=graph_db_config,
    )


@ -48,7 +55,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
 ) -> list[Task]:
    default_tasks = [
        Task(classify_documents),
-        Task(check_permissions_on_documents, user=user, permissions=["write"]),
+        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
        Task(
            extract_chunks_from_documents,
            max_chunk_size=chunk_size or get_max_chunk_tokens(),
--- a/cognee/api/v1/cognify/routers/get_cognify_router.py
+++ b/cognee/api/v1/cognify/routers/get_cognify_router.py
@ -1,3 +1,4 @@
+from uuid import UUID
 from typing import List, Optional
 from pydantic import BaseModel
 from fastapi import Depends
@ -10,6 +11,7 @@ from cognee.shared.data_models import KnowledgeGraph

 class CognifyPayloadDTO(BaseModel):
    datasets: List[str]
+    dataset_ids: Optional[List[UUID]]
    graph_model: Optional[BaseModel] = KnowledgeGraph


@ -22,7 +24,9 @@ def get_cognify_router() -> APIRouter:
        from cognee.api.v1.cognify import cognify as cognee_cognify

        try:
-            await cognee_cognify(payload.datasets, user, payload.graph_model)
+            # Send dataset UUIDs if they are given, if not send dataset names
+            datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
+            await cognee_cognify(datasets, user, payload.graph_model)
        except Exception as error:
            return JSONResponse(status_code=409, content={"error": str(error)})

--- a/cognee/api/v1/permissions/routers/get_permissions_router.py
+++ b/cognee/api/v1/permissions/routers/get_permissions_router.py
@ -1,66 +1,69 @@
 from uuid import UUID
+from typing import List

-from fastapi import APIRouter
+from fastapi import APIRouter, Depends
 from fastapi.responses import JSONResponse

+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_authenticated_user
+

 def get_permissions_router() -> APIRouter:
    permissions_router = APIRouter()

-    @permissions_router.post("/roles/{role_id}/permissions")
-    async def give_default_permission_to_role(role_id: UUID, permission_name: str):
-        from cognee.modules.users.permissions.methods import (
-            give_default_permission_to_role as set_default_permission_to_role,
+    @permissions_router.post("/datasets/{principal_id}/")
+    async def give_datasets_permission_to_principal(
+        permission_name: str,
+        dataset_ids: List[UUID],
+        principal_id: UUID,
+        user: User = Depends(get_authenticated_user),
+    ):
+        from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
+
+        await authorized_give_permission_on_datasets(
+            principal_id,
+            [dataset_id for dataset_id in dataset_ids],
+            permission_name,
+            user.id,
        )

-        await set_default_permission_to_role(role_id, permission_name)
-
-        return JSONResponse(status_code=200, content={"message": "Permission assigned to role"})
-
-    @permissions_router.post("/tenants/{tenant_id}/permissions")
-    async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
-        from cognee.modules.users.permissions.methods import (
-            give_default_permission_to_tenant as set_tenant_default_permissions,
+        return JSONResponse(
+            status_code=200, content={"message": "Permission assigned to principal"}
        )

-        await set_tenant_default_permissions(tenant_id, permission_name)
-
-        return JSONResponse(status_code=200, content={"message": "Permission assigned to tenant"})
-
-    @permissions_router.post("/users/{user_id}/permissions")
-    async def give_default_permission_to_user(user_id: UUID, permission_name: str):
-        from cognee.modules.users.permissions.methods import (
-            give_default_permission_to_user as set_default_permission_to_user,
-        )
-
-        await set_default_permission_to_user(user_id, permission_name)
-
-        return JSONResponse(status_code=200, content={"message": "Permission assigned to user"})
-
    @permissions_router.post("/roles")
-    async def create_role(
-        role_name: str,
-        tenant_id: UUID,
-    ):
+    async def create_role(role_name: str, user: User = Depends(get_authenticated_user)):
        from cognee.modules.users.roles.methods import create_role as create_role_method

-        await create_role_method(role_name=role_name, tenant_id=tenant_id)
+        await create_role_method(role_name=role_name, owner_id=user.id)

        return JSONResponse(status_code=200, content={"message": "Role created for tenant"})

    @permissions_router.post("/users/{user_id}/roles")
-    async def add_user_to_role(user_id: UUID, role_id: UUID):
+    async def add_user_to_role(
+        user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user)
+    ):
        from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method

-        await add_user_to_role_method(user_id=user_id, role_id=role_id)
+        await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id)

        return JSONResponse(status_code=200, content={"message": "User added to role"})

+    @permissions_router.post("/users/{user_id}/tenants")
+    async def add_user_to_tenant(
+        user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user)
+    ):
+        from cognee.modules.users.tenants.methods import add_user_to_tenant
+
+        await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id)
+
+        return JSONResponse(status_code=200, content={"message": "User added to tenant"})
+
    @permissions_router.post("/tenants")
-    async def create_tenant(tenant_name: str):
+    async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)):
        from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method

-        await create_tenant_method(tenant_name=tenant_name)
+        await create_tenant_method(tenant_name=tenant_name, user_id=user.id)

        return JSONResponse(status_code=200, content={"message": "Tenant created."})

--- a/cognee/api/v1/search/routers/get_search_router.py
+++ b/cognee/api/v1/search/routers/get_search_router.py
@ -1,4 +1,5 @@
 from uuid import UUID
+from typing import Optional, Union
 from datetime import datetime
 from fastapi import Depends, APIRouter
 from fastapi.responses import JSONResponse
@ -9,8 +10,12 @@ from cognee.modules.search.operations import get_history
 from cognee.modules.users.methods import get_authenticated_user


+# Note: Datasets sent by name will only map to datasets owned by the request sender
+#       To search for datasets not owned by the request sender dataset UUID is needed
 class SearchPayloadDTO(InDTO):
    search_type: SearchType
+    datasets: Optional[list[str]] = None
+    dataset_ids: Optional[list[UUID]] = None
    query: str


@ -39,7 +44,11 @@ def get_search_router() -> APIRouter:

        try:
            results = await cognee_search(
-                query_text=payload.query, query_type=payload.search_type, user=user
+                query_text=payload.query,
+                query_type=payload.search_type,
+                user=user,
+                datasets=payload.datasets,
+                dataset_ids=payload.dataset_ids,
            )

            return results
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@ -1,32 +1,43 @@
+from uuid import UUID
 from typing import Union, Optional, List, Type

 from cognee.modules.users.models import User
 from cognee.modules.search.types import SearchType
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.search.methods import search as search_function
+from cognee.modules.data.methods import get_authorized_existing_datasets
+from cognee.modules.data.exceptions import DatasetNotFoundError


 async def search(
    query_text: str,
    query_type: SearchType = SearchType.GRAPH_COMPLETION,
    user: User = None,
-    datasets: Union[list[str], str, None] = None,
+    datasets: Optional[Union[list[str], str]] = None,
+    dataset_ids: Optional[Union[list[UUID], UUID]] = None,
    system_prompt_path: str = "answer_simple_question.txt",
    top_k: int = 10,
    node_type: Optional[Type] = None,
    node_name: Optional[List[str]] = None,
 ) -> list:
    # We use lists from now on for datasets
-    if isinstance(datasets, str):
+    if isinstance(datasets, UUID) or isinstance(datasets, str):
        datasets = [datasets]

    if user is None:
        user = await get_default_user()

+    # Transform string based datasets to UUID - String based datasets can only be found for current user
+    if datasets is not None and [all(isinstance(dataset, str) for dataset in datasets)]:
+        datasets = await get_authorized_existing_datasets(datasets, "read", user)
+        datasets = [dataset.id for dataset in datasets]
+        if not datasets:
+            raise DatasetNotFoundError(message="No datasets found.")
+
    filtered_search_results = await search_function(
        query_text=query_text,
        query_type=query_type,
-        datasets=datasets,
+        dataset_ids=dataset_ids if dataset_ids else datasets,
        user=user,
        system_prompt_path=system_prompt_path,
        top_k=top_k,
--- a/cognee/context_global_variables.py
+++ b/cognee/context_global_variables.py
@ -0,0 +1,67 @@
+import os
+import pathlib
+from contextvars import ContextVar
+from typing import Union
+from uuid import UUID
+
+from cognee.infrastructure.databases.utils import get_or_create_dataset_database
+from cognee.modules.users.methods import get_user
+
+# Note: ContextVar allows us to use different graph db configurations in Cognee
+#       for different async tasks, threads and processes
+vector_db_config = ContextVar("vector_db_config", default=None)
+graph_db_config = ContextVar("graph_db_config", default=None)
+
+
+async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
+    """
+    If backend access control is enabled this function will ensure all datasets have their own databases,
+    access to which will be enforced by given permissions.
+    Database name will be determined by dataset_id and LanceDB and KuzuDB use will be enforced.
+
+    Note: This is only currently supported by the following databases:
+          Relational: SQLite, Postgres
+          Vector: LanceDB
+          Graph: KuzuDB
+
+    Args:
+        dataset: Cognee dataset name or id
+        user_id: UUID of the owner of the dataset
+
+    Returns:
+
+    """
+
+    if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
+        return
+
+    user = await get_user(user_id)
+
+    # To ensure permissions are enforced properly all datasets will have their own databases
+    dataset_database = await get_or_create_dataset_database(dataset, user)
+
+    # TODO: Find better location for database files
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, f".cognee_system/databases/{user.id}")
+        ).resolve()
+    )
+
+    # Set vector and graph database configuration based on dataset database information
+    vector_config = {
+        "vector_db_url": os.path.join(cognee_directory_path, dataset_database.vector_database_name),
+        "vector_db_key": "",
+        "vector_db_provider": "lancedb",
+    }
+
+    graph_config = {
+        "graph_database_provider": "kuzu",
+        "graph_file_path": os.path.join(
+            cognee_directory_path, dataset_database.graph_database_name
+        ),
+    }
+
+    # Use ContextVar to use these graph and vector configurations are used
+    # in the current async context across Cognee
+    graph_db_config.set(graph_config)
+    vector_db_config.set(vector_config)
--- a/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py
+++ b/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py
@ -8,7 +8,7 @@ from cognee.modules.users.models import User
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.shared.utils import send_telemetry
 from cognee.tasks.documents import (
-    check_permissions_on_documents,
+    check_permissions_on_dataset,
    classify_documents,
    extract_chunks_from_documents,
 )
@ -31,7 +31,7 @@ async def get_cascade_graph_tasks(
        cognee_config = get_cognify_config()
        default_tasks = [
            Task(classify_documents),
-            Task(check_permissions_on_documents, user=user, permissions=["write"]),
+            Task(check_permissions_on_dataset, user=user, permissions=["write"]),
            Task(
                extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
            ),  # Extract text chunks based on the document type.
--- a/cognee/exceptions/exceptions.py
+++ b/cognee/exceptions/exceptions.py
@ -31,6 +31,9 @@ class CogneeApiError(Exception):

        super().__init__(self.message, self.name)

+    def __str__(self):
+        return f"{self.name}: {self.message} (Status code: {self.status_code})"
+

 class ServiceError(CogneeApiError):
    """Failures in external services or APIs, like a database or a third-party service"""
--- a/cognee/infrastructure/databases/graph/config.py
+++ b/cognee/infrastructure/databases/graph/config.py
@ -105,3 +105,14 @@ def get_graph_config():
        - GraphConfig: A GraphConfig instance containing the graph configuration settings.
    """
    return GraphConfig()
+
+
+def get_graph_context_config():
+    """This function will get the appropriate graph db config based on async context.
+    This allows the use of multiple graph databases for different threads, async tasks and parallelization
+    """
+    from cognee.context_global_variables import graph_db_config
+
+    if graph_db_config.get():
+        return graph_db_config.get()
+    return get_graph_config().to_hashable_dict()
--- a/cognee/infrastructure/databases/graph/get_graph_engine.py
+++ b/cognee/infrastructure/databases/graph/get_graph_engine.py
@ -2,36 +2,22 @@

 from functools import lru_cache

-
-from .config import get_graph_config
+from .config import get_graph_context_config
 from .graph_db_interface import GraphDBInterface
 from .supported_databases import supported_databases


 async def get_graph_engine() -> GraphDBInterface:
-    """
-    Factory function to get the appropriate graph client based on the graph type.
+    """Factory function to get the appropriate graph client based on the graph type."""
+    # Get appropriate graph configuration based on current async context
+    config = get_graph_context_config()

-    This function retrieves the graph configuration and creates a graph engine by calling
-    the `create_graph_engine` function. If the configured graph database provider is
-    'networkx', it ensures that the graph is loaded from a file asynchronously if it hasn't
-    been loaded yet. It raises an `EnvironmentError` if the necessary configurations for the
-    selected graph provider are missing.
-
-    Returns:
-    --------
-
-        - GraphDBInterface: Returns an instance of GraphDBInterface which represents the
-          selected graph client.
-    """
-    config = get_graph_config()
-
-    graph_client = create_graph_engine(**get_graph_config().to_hashable_dict())
+    graph_client = create_graph_engine(**config)

    # Async functions can't be cached. After creating and caching the graph engine
    # handle all necessary async operations for different graph types bellow.
    # Handle loading of graph for NetworkX
-    if config.graph_database_provider.lower() == "networkx" and graph_client.graph is None:
+    if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
        await graph_client.load_graph_from_file()

    return graph_client
@ -40,11 +26,11 @@ async def get_graph_engine() -> GraphDBInterface:
@lru_cache
 def create_graph_engine(
    graph_database_provider,
-    graph_database_url,
-    graph_database_username,
-    graph_database_password,
-    graph_database_port,
    graph_file_path,
+    graph_database_url="",
+    graph_database_username="",
+    graph_database_password="",
+    graph_database_port="",
 ):
    """
    Create a graph engine based on the specified provider type.
--- a/cognee/infrastructure/databases/utils/init.py
+++ b/cognee/infrastructure/databases/utils/init.py
@ -0,0 +1 @@
+from .get_or_create_dataset_database import get_or_create_dataset_database
--- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py
+++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py
@ -0,0 +1,68 @@
+from uuid import UUID
+from typing import Union
+
+from sqlalchemy import select
+from sqlalchemy.exc import IntegrityError
+from cognee.modules.data.methods import create_dataset
+
+from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.modules.data.methods import get_unique_dataset_id
+from cognee.modules.users.models import DatasetDatabase
+from cognee.modules.users.models import User
+
+
+async def get_or_create_dataset_database(
+    dataset: Union[str, UUID],
+    user: User,
+) -> DatasetDatabase:
+    """
+    Return the `DatasetDatabase` row for the given owner + dataset.
+
+    • If the row already exists, it is fetched and returned.
+    • Otherwise a new one is created atomically and returned.
+
+    Parameters
+    ----------
+    user : User
+        Principal that owns this dataset.
+    dataset : Union[str, UUID]
+        Dataset being linked.
+    """
+    db_engine = get_relational_engine()
+
+    dataset_id = await get_unique_dataset_id(dataset, user)
+
+    vector_db_name = f"{dataset_id}.lance.db"
+    graph_db_name = f"{dataset_id}.pkl"
+
+    async with db_engine.get_async_session() as session:
+        # Create dataset if it doesn't exist
+        if isinstance(dataset, str):
+            dataset = await create_dataset(dataset, user, session)
+
+        # Try to fetch an existing row first
+        stmt = select(DatasetDatabase).where(
+            DatasetDatabase.owner_id == user.id,
+            DatasetDatabase.dataset_id == dataset_id,
+        )
+        existing: DatasetDatabase = await session.scalar(stmt)
+        if existing:
+            return existing
+
+        # If there are no existing rows build a new row
+        record = DatasetDatabase(
+            owner_id=user.id,
+            dataset_id=dataset_id,
+            vector_database_name=vector_db_name,
+            graph_database_name=graph_db_name,
+        )
+
+        try:
+            session.add(record)
+            await session.commit()
+            await session.refresh(record)
+            return record
+
+        except IntegrityError:
+            await session.rollback()
+            raise
--- a/cognee/infrastructure/databases/vector/config.py
+++ b/cognee/infrastructure/databases/vector/config.py
@ -62,3 +62,12 @@ def get_vectordb_config():
          configuration.
    """
    return VectorConfig()
+
+
+def get_vectordb_context_config():
+    """This function will get the appropriate vector db config based on async context."""
+    from cognee.context_global_variables import vector_db_config
+
+    if vector_db_config.get():
+        return vector_db_config.get()
+    return get_vectordb_config().to_dict()
--- a/cognee/infrastructure/databases/vector/create_vector_engine.py
+++ b/cognee/infrastructure/databases/vector/create_vector_engine.py
@ -6,10 +6,10 @@ from functools import lru_cache

@lru_cache
 def create_vector_engine(
-    vector_db_url: str,
-    vector_db_port: str,
-    vector_db_key: str,
    vector_db_provider: str,
+    vector_db_url: str,
+    vector_db_port: str = "",
+    vector_db_key: str = "",
 ):
    """
    Create a vector database engine based on the specified provider.
--- a/cognee/infrastructure/databases/vector/get_vector_engine.py
+++ b/cognee/infrastructure/databases/vector/get_vector_engine.py
@ -1,14 +1,7 @@
-from .config import get_vectordb_config
+from .config import get_vectordb_context_config
 from .create_vector_engine import create_vector_engine


 def get_vector_engine():
-    """
-    Create and return a vector engine instance.
-
-    Returns:
-    --------
-
-        A vector engine instance created from the vector database configuration.
-    """
-    return create_vector_engine(**get_vectordb_config().to_dict())
+    # Get appropriate vector db configuration based on current async context
+    return create_vector_engine(**get_vectordb_context_config())
--- a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py
+++ b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py
@ -1,18 +1,13 @@
-from ..get_vector_engine import get_vector_engine, get_vectordb_config
+from ..get_vector_engine import get_vector_engine, get_vectordb_context_config
 from sqlalchemy import text
+from cognee.context_global_variables import vector_db_config as context_vector_db_config


 async def create_db_and_tables():
-    """
-    Create the database and its associated tables if necessary.
-
-    This function checks the vector database provider configuration and, if it is set to
-    "pgvector", creates the necessary vector extension in the PostgreSQL database using an
-    asynchronous context manager.
-    """
-    vector_config = get_vectordb_config()
+    # Get appropriate vector db configuration based on current async context
+    vector_config = get_vectordb_context_config()
    vector_engine = get_vector_engine()

-    if vector_config.vector_db_provider == "pgvector":
+    if vector_config["vector_db_provider"] == "pgvector":
        async with vector_engine.engine.begin() as connection:
            await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
--- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
+++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
@ -1,3 +1,4 @@
+import os
 from typing import Dict, List, Optional
 from qdrant_client import AsyncQdrantClient, models

@ -147,14 +148,15 @@ class QDrantAdapter(VectorDBInterface):

        Returns:
        --------
-
            - AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
              operations.
        """
+        is_prod = os.getenv("ENV").lower() == "prod"
+
        if self.qdrant_path is not None:
-            return AsyncQdrantClient(path=self.qdrant_path, port=6333)
+            return AsyncQdrantClient(path=self.qdrant_path, port=6333, https=is_prod)
        elif self.url is not None:
-            return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333)
+            return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333, https=is_prod)

        return AsyncQdrantClient(location=":memory:")

--- a/cognee/modules/data/exceptions/init.py
+++ b/cognee/modules/data/exceptions/init.py
@ -7,4 +7,6 @@ This module defines a set of exceptions for handling various data errors
 from .exceptions import (
    UnstructuredLibraryImportError,
    UnauthorizedDataAccessError,
+    DatasetNotFoundError,
+    DatasetTypeError,
 )
--- a/cognee/modules/data/exceptions/exceptions.py
+++ b/cognee/modules/data/exceptions/exceptions.py
@ -20,3 +20,23 @@ class UnauthorizedDataAccessError(CogneeApiError):
        status_code=status.HTTP_401_UNAUTHORIZED,
    ):
        super().__init__(message, name, status_code)
+
+
+class DatasetNotFoundError(CogneeApiError):
+    def __init__(
+        self,
+        message: str = "Dataset not found.",
+        name: str = "DatasetNotFoundError",
+        status_code=status.HTTP_404_NOT_FOUND,
+    ):
+        super().__init__(message, name, status_code)
+
+
+class DatasetTypeError(CogneeApiError):
+    def __init__(
+        self,
+        message: str = "Dataset type not supported.",
+        name: str = "DatasetTypeError",
+        status_code=status.HTTP_400_BAD_REQUEST,
+    ):
+        super().__init__(message, name, status_code)
--- a/cognee/modules/data/methods/init.py
+++ b/cognee/modules/data/methods/init.py
@ -8,7 +8,15 @@ from .get_datasets_by_name import get_datasets_by_name
 from .get_dataset_data import get_dataset_data
 from .get_data import get_data
 from .get_unique_dataset_id import get_unique_dataset_id
+from .get_authorized_existing_datasets import get_authorized_existing_datasets
+from .get_dataset_ids import get_dataset_ids

 # Delete
 from .delete_dataset import delete_dataset
 from .delete_data import delete_data
+
+# Create
+from .load_or_create_datasets import load_or_create_datasets
+
+# Check
+from .check_dataset_name import check_dataset_name
--- a/cognee/modules/data/methods/check_dataset_name.py
+++ b/cognee/modules/data/methods/check_dataset_name.py
@ -0,0 +1,3 @@
+def check_dataset_name(dataset_name: str):
+    if "." in dataset_name or " " in dataset_name:
+        raise ValueError("Dataset name cannot contain spaces or underscores")
--- a/cognee/modules/data/methods/create_dataset.py
+++ b/cognee/modules/data/methods/create_dataset.py
@ -1,4 +1,3 @@
-from uuid import UUID, uuid5, NAMESPACE_OID
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy import select
 from sqlalchemy.orm import joinedload
--- a/cognee/modules/data/methods/get_authorized_existing_datasets.py
+++ b/cognee/modules/data/methods/get_authorized_existing_datasets.py
@ -0,0 +1,39 @@
+from typing import Union
+from uuid import UUID
+
+from cognee.modules.data.models import Dataset
+from cognee.modules.users.models import User
+from cognee.modules.data.methods.get_dataset_ids import get_dataset_ids
+from cognee.modules.users.permissions.methods import get_all_user_permission_datasets
+from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
+
+
+async def get_authorized_existing_datasets(
+    datasets: Union[list[str], list[UUID]], permission_type: str, user: User
+) -> list[Dataset]:
+    """
+    Function returns a list of existing dataset objects user has access for based on datasets input.
+
+    Args:
+        datasets:
+        user:
+
+    Returns:
+        list of Dataset objects
+
+    """
+    if datasets:
+        # Function handles transforming dataset input to dataset IDs (if possible)
+        dataset_ids = await get_dataset_ids(datasets, user)
+        # If dataset_ids are provided filter these datasets based on what user has permission for.
+        if dataset_ids:
+            existing_datasets = await get_specific_user_permission_datasets(
+                user.id, permission_type, dataset_ids
+            )
+        else:
+            existing_datasets = []
+    else:
+        # If no datasets are provided, work with all existing datasets user has permission for.
+        existing_datasets = await get_all_user_permission_datasets(user, permission_type)
+
+    return existing_datasets
--- a/cognee/modules/data/methods/get_dataset_ids.py
+++ b/cognee/modules/data/methods/get_dataset_ids.py
@ -0,0 +1,36 @@
+from typing import Union
+from uuid import UUID
+
+from cognee.modules.data.exceptions import DatasetTypeError
+from cognee.modules.data.methods import get_datasets
+
+
+async def get_dataset_ids(datasets: Union[list[str], list[UUID]], user):
+    """
+    Function returns dataset IDs necessary based on provided input.
+    It transforms raw strings into real dataset_ids with keeping write permissions in mind.
+    If a user wants to write to a dataset he is not the owner of it must be provided through UUID.
+    Args:
+        datasets:
+        pipeline_name:
+        user:
+
+    Returns: a list of write access dataset_ids if they exist
+
+    """
+    if all(isinstance(dataset, UUID) for dataset in datasets):
+        # Return list of dataset UUIDs
+        dataset_ids = datasets
+    else:
+        # Convert list of dataset names to dataset UUID
+        if all(isinstance(dataset, str) for dataset in datasets):
+            # Get all user owned dataset objects (If a user wants to write to a dataset he is not the owner of it must be provided through UUID.)
+            user_datasets = await get_datasets(user.id)
+            # Filter out non name mentioned datasets
+            dataset_ids = [dataset.id for dataset in user_datasets if dataset.name in datasets]
+        else:
+            raise DatasetTypeError(
+                f"One or more of the provided dataset types is not handled: f{datasets}"
+            )
+
+    return dataset_ids
--- a/cognee/modules/data/methods/get_unique_dataset_id.py
+++ b/cognee/modules/data/methods/get_unique_dataset_id.py
@ -1,6 +1,9 @@
 from uuid import UUID, uuid5, NAMESPACE_OID
 from cognee.modules.users.models import User
+from typing import Union


-async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID:
+async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID:
+    if isinstance(dataset_name, UUID):
+        return dataset_name
    return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
--- a/cognee/modules/data/methods/load_or_create_datasets.py
+++ b/cognee/modules/data/methods/load_or_create_datasets.py
@ -0,0 +1,42 @@
+from typing import List, Union
+from uuid import UUID
+
+from cognee.modules.data.models import Dataset
+from cognee.modules.data.methods import get_unique_dataset_id
+from cognee.modules.data.exceptions import DatasetNotFoundError
+
+
+async def load_or_create_datasets(
+    dataset_names: List[Union[str, UUID]], existing_datasets: List[Dataset], user
+) -> List[Dataset]:
+    """
+    Given a list of dataset identifiers (names or UUIDs), return Dataset instances:
+      - If an identifier matches an existing Dataset (by name or id), reuse it.
+      - Otherwise, create a new Dataset with a unique id. Note: Created dataset is not stored to database.
+    """
+    result: List[Dataset] = []
+
+    for identifier in dataset_names:
+        # Try to find a matching dataset in the existing list
+        # If no matching dataset is found return None
+        match = next(
+            (ds for ds in existing_datasets if ds.name == identifier or ds.id == identifier), None
+        )
+
+        if match:
+            result.append(match)
+            continue
+
+        # If the identifier is a UUID but nothing matched, that's an error
+        if isinstance(identifier, UUID):
+            raise DatasetNotFoundError(f"Dataset with given UUID does not exist: {identifier}")
+
+        # Otherwise, create a new Dataset instance
+        new_dataset = Dataset(
+            id=await get_unique_dataset_id(dataset_name=identifier, user=user),
+            name=identifier,
+            owner_id=user.id,
+        )
+        result.append(new_dataset)
+
+    return result
--- a/cognee/modules/data/models/Data.py
+++ b/cognee/modules/data/models/Data.py
@ -33,9 +33,6 @@ class Data(Base):
        cascade="all, delete",
    )

-    # New relationship for ACLs with cascade deletion
-    acls = relationship("ACL", back_populates="data", cascade="all, delete-orphan")
-
    def to_json(self) -> dict:
        return {
            "id": str(self.id),
--- a/cognee/modules/data/models/Dataset.py
+++ b/cognee/modules/data/models/Dataset.py
@ -19,6 +19,8 @@ class Dataset(Base):

    owner_id = Column(UUID, index=True)

+    acls = relationship("ACL", back_populates="dataset", cascade="all, delete-orphan")
+
    data: Mapped[List["Data"]] = relationship(
        "Data",
        secondary=DatasetData.__tablename__,
--- a/cognee/modules/pipelines/operations/pipeline.py
+++ b/cognee/modules/pipelines/operations/pipeline.py
@ -1,11 +1,9 @@
 import asyncio
 from typing import Union
-from uuid import NAMESPACE_OID, uuid5
+from uuid import NAMESPACE_OID, uuid5, UUID

 from cognee.shared.logging_utils import get_logger
-from cognee.modules.data.methods import get_datasets
 from cognee.modules.data.methods.get_dataset_data import get_dataset_data
-from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
 from cognee.modules.data.models import Data, Dataset
 from cognee.modules.pipelines.operations.run_tasks import run_tasks
 from cognee.modules.pipelines.models import PipelineRunStatus
@ -14,6 +12,13 @@ from cognee.modules.pipelines.tasks.task import Task
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.users.models import User
 from cognee.modules.pipelines.operations import log_pipeline_run_initiated
+from cognee.context_global_variables import set_database_global_context_variables
+from cognee.modules.data.exceptions import DatasetNotFoundError
+from cognee.modules.data.methods import (
+    get_authorized_existing_datasets,
+    load_or_create_datasets,
+    check_dataset_name,
+)

 from cognee.infrastructure.databases.relational import (
    create_db_and_tables as create_relational_db_and_tables,
@ -21,6 +26,10 @@ from cognee.infrastructure.databases.relational import (
 from cognee.infrastructure.databases.vector.pgvector import (
    create_db_and_tables as create_pgvector_db_and_tables,
 )
+from cognee.context_global_variables import (
+    graph_db_config as context_graph_db_config,
+    vector_db_config as context_vector_db_config,
+)

 logger = get_logger("cognee.pipeline")

@ -30,10 +39,19 @@ update_status_lock = asyncio.Lock()
 async def cognee_pipeline(
    tasks: list[Task],
    data=None,
-    datasets: Union[str, list[str]] = None,
+    datasets: Union[str, list[str], list[UUID]] = None,
    user: User = None,
    pipeline_name: str = "custom_pipeline",
+    vector_db_config: dict = None,
+    graph_db_config: dict = None,
 ):
+    # Note: These context variables allow different value assignment for databases in Cognee
+    #       per async task, thread, process and etc.
+    if vector_db_config:
+        context_vector_db_config.set(vector_db_config)
+    if graph_db_config:
+        context_graph_db_config.set(graph_db_config)
+
    # Create tables for databases
    await create_relational_db_and_tables()
    await create_pgvector_db_and_tables()
@ -54,49 +72,35 @@ async def cognee_pipeline(
    if user is None:
        user = await get_default_user()

-    # Convert datasets to list in case it's a string
-    if isinstance(datasets, str):
+    # Convert datasets to list
+    if isinstance(datasets, str) or isinstance(datasets, UUID):
        datasets = [datasets]

-    # If no datasets are provided, work with all existing datasets.
-    existing_datasets = await get_datasets(user.id)
+    # Get datasets user wants write permissions for (verify user has permissions if datasets are provided as well)
+    # NOTE: If a user wants to write to a dataset he does not own it must be provided through UUID
+    existing_datasets = await get_authorized_existing_datasets(datasets, "write", user)

    if not datasets:
        # Get datasets from database if none sent.
        datasets = existing_datasets
    else:
-        # If dataset is already in database, use it, otherwise create a new instance.
-        dataset_instances = []
+        # If dataset matches an existing Dataset (by name or id), reuse it. Otherwise, create a new Dataset.
+        datasets = await load_or_create_datasets(datasets, existing_datasets, user)

-        for dataset_name in datasets:
-            is_dataset_found = False
-
-            for existing_dataset in existing_datasets:
-                if (
-                    existing_dataset.name == dataset_name
-                    or str(existing_dataset.id) == dataset_name
-                ):
-                    dataset_instances.append(existing_dataset)
-                    is_dataset_found = True
-                    break
-
-            if not is_dataset_found:
-                dataset_instances.append(
-                    Dataset(
-                        id=await get_unique_dataset_id(dataset_name=dataset_name, user=user),
-                        name=dataset_name,
-                        owner_id=user.id,
-                    )
-                )
-
-        datasets = dataset_instances
+    if not datasets:
+        raise DatasetNotFoundError("There are no datasets to work with.")

    awaitables = []

    for dataset in datasets:
        awaitables.append(
            run_pipeline(
-                dataset=dataset, user=user, tasks=tasks, data=data, pipeline_name=pipeline_name
+                dataset=dataset,
+                user=user,
+                tasks=tasks,
+                data=data,
+                pipeline_name=pipeline_name,
+                context={"dataset": dataset},
            )
        )

@ -109,9 +113,13 @@ async def run_pipeline(
    tasks: list[Task],
    data=None,
    pipeline_name: str = "custom_pipeline",
+    context: dict = None,
 ):
    check_dataset_name(dataset.name)

+    # Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
+    await set_database_global_context_variables(dataset.name, user.id)
+
    # Ugly hack, but no easier way to do this.
    if pipeline_name == "add_pipeline":
        # Refresh the add pipeline status so data is added to a dataset.
@ -160,15 +168,10 @@ async def run_pipeline(
        if not isinstance(task, Task):
            raise ValueError(f"Task {task} is not an instance of Task")

-    pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name)
+    pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name, context=context)
    pipeline_run_status = None

    async for run_status in pipeline_run:
        pipeline_run_status = run_status

    return pipeline_run_status
-
-
-def check_dataset_name(dataset_name: str) -> str:
-    if "." in dataset_name or " " in dataset_name:
-        raise ValueError("Dataset name cannot contain spaces or underscores")
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -1,8 +1,11 @@
+import os
 import json
-from typing import Callable, Optional, List, Type
+import asyncio
+from uuid import UUID
+from typing import Callable, List, Optional, Type, Union

+from cognee.context_global_variables import set_database_global_context_variables
 from cognee.exceptions import InvalidValueError
-from cognee.infrastructure.engine.utils import parse_id
 from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
 from cognee.modules.retrieval.insights_retriever import InsightsRetriever
 from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
@ -21,24 +24,45 @@ from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageR
 from cognee.modules.search.types import SearchType
 from cognee.modules.storage.utils import JSONEncoder
 from cognee.modules.users.models import User
-from cognee.modules.users.permissions.methods import get_document_ids_for_user
+from cognee.modules.data.models import Dataset
 from cognee.shared.utils import send_telemetry
+from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
 from cognee.modules.search.operations import log_query, log_result


 async def search(
    query_text: str,
    query_type: SearchType,
-    datasets: list[str],
+    dataset_ids: Union[list[UUID], None],
    user: User,
    system_prompt_path="answer_simple_question.txt",
    top_k: int = 10,
    node_type: Optional[Type] = None,
    node_name: Optional[List[str]] = None,
 ):
+    """
+
+    Args:
+        query_text:
+        query_type:
+        datasets:
+        user:
+        system_prompt_path:
+        top_k:
+
+    Returns:
+
+    Notes:
+        Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
+    """
+    # Use search function filtered by permissions if access control is enabled
+    if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
+        return await permissions_search(
+            query_text, query_type, user, dataset_ids, system_prompt_path, top_k
+        )
+
    query = await log_query(query_text, query_type.value, user.id)

-    own_document_ids = await get_document_ids_for_user(user.id, datasets)
    search_results = await specific_search(
        query_type,
        query_text,
@ -49,18 +73,9 @@ async def search(
        node_name=node_name,
    )

-    filtered_search_results = []
+    await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)

-    for search_result in search_results:
-        document_id = search_result["document_id"] if "document_id" in search_result else None
-        document_id = parse_id(document_id)
-
-        if document_id is None or document_id in own_document_ids:
-            filtered_search_results.append(search_result)
-
-    await log_result(query.id, json.dumps(filtered_search_results, cls=JSONEncoder), user.id)
-
-    return filtered_search_results
+    return search_results


 async def specific_search(
@ -120,3 +135,62 @@ async def specific_search(
    send_telemetry("cognee.search EXECUTION COMPLETED", user.id)

    return results
+
+
+async def permissions_search(
+    query_text: str,
+    query_type: SearchType,
+    user: User = None,
+    dataset_ids: Optional[list[UUID]] = None,
+    system_prompt_path: str = "answer_simple_question.txt",
+    top_k: int = 10,
+) -> list:
+    """
+    Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
+    Not to be used outside of active access control mode.
+    """
+
+    query = await log_query(query_text, query_type.value, user.id)
+
+    # Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
+    search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
+
+    # Searches all provided datasets and handles setting up of appropriate database context based on permissions
+    search_results = await specific_search_by_context(
+        search_datasets, query_text, query_type, user, system_prompt_path, top_k
+    )
+
+    await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
+
+    return search_results
+
+
+async def specific_search_by_context(
+    search_datasets: list[Dataset],
+    query_text: str,
+    query_type: SearchType,
+    user: User,
+    system_prompt_path: str,
+    top_k: int,
+):
+    """
+    Searches all provided datasets and handles setting up of appropriate database context based on permissions.
+    Not to be used outside of active access control mode.
+    """
+
+    async def _search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k):
+        # Set database configuration in async context for each dataset user has access for
+        await set_database_global_context_variables(dataset.id, dataset.owner_id)
+        search_results = await specific_search(
+            query_type, query_text, user, system_prompt_path=system_prompt_path, top_k=top_k
+        )
+        return {dataset.name: search_results}
+
+    # Search every dataset async based on query and appropriate database configuration
+    tasks = []
+    for dataset in search_datasets:
+        tasks.append(
+            _search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k)
+        )
+
+    return await asyncio.gather(*tasks)
--- a/cognee/modules/users/authentication/get_auth_backend.py
+++ b/cognee/modules/users/authentication/get_auth_backend.py
@ -19,17 +19,14 @@ class CustomJWTStrategy(JWTStrategy):
        # JoinLoad tenant and role information to user object
        user = await get_user(user.id)

-        if user.tenant:
-            data = {"user_id": str(user.id), "tenant_id": str(user.tenant.id), "roles": user.roles}
-        else:
-            # The default tenant is None
-            data = {"user_id": str(user.id), "tenant_id": None, "roles": user.roles}
+        data = {"user_id": str(user.id)}
+
        return generate_jwt(data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm)


@lru_cache
 def get_auth_backend():
-    bearer_transport = BearerTransport(tokenUrl="auth/jwt/login")
+    bearer_transport = BearerTransport(tokenUrl="api/v1/auth/login")

    def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]:
        secret = os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret")
--- a/cognee/modules/users/exceptions/init.py
+++ b/cognee/modules/users/exceptions/init.py
@ -9,4 +9,5 @@ from .exceptions import (
    UserNotFoundError,
    PermissionDeniedError,
    TenantNotFoundError,
+    PermissionNotFoundError,
 )
--- a/cognee/modules/users/exceptions/exceptions.py
+++ b/cognee/modules/users/exceptions/exceptions.py
@ -46,3 +46,13 @@ class PermissionDeniedError(CogneeApiError):
        status_code=status.HTTP_403_FORBIDDEN,
    ):
        super().__init__(message, name, status_code)
+
+
+class PermissionNotFoundError(CogneeApiError):
+    def __init__(
+        self,
+        message: str = "Permission type does not exist.",
+        name: str = "PermissionNotFoundError",
+        status_code=status.HTTP_403_FORBIDDEN,
+    ):
+        super().__init__(message, name, status_code)
--- a/cognee/modules/users/methods/get_authenticated_user.py
+++ b/cognee/modules/users/methods/get_authenticated_user.py
@ -1,7 +1,8 @@
 from types import SimpleNamespace

 from ..get_fastapi_users import get_fastapi_users
-from fastapi import HTTPException, Header
+from fastapi import HTTPException, Security
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 import os
 import jwt

@ -9,28 +10,29 @@ from uuid import UUID

 fastapi_users = get_fastapi_users()

+# Allows Swagger to understand authorization type and allow single sign on for the Swagger docs to test backend
+bearer_scheme = HTTPBearer(scheme_name="BearerAuth", description="Paste **Bearer &lt;JWT&gt;**")

-async def get_authenticated_user(authorization: str = Header(...)) -> SimpleNamespace:
-    """Extract and validate JWT from Authorization header."""
+
+async def get_authenticated_user(
+    creds: HTTPAuthorizationCredentials = Security(bearer_scheme),
+) -> SimpleNamespace:
+    """
+    Extract and validate the JWT presented in the Authorization header.
+    """
+    if creds is None:  # header missing
+        raise HTTPException(status_code=401, detail="Not authenticated")
+
+    if creds.scheme.lower() != "bearer":  # shouldn't happen extra guard
+        raise HTTPException(status_code=401, detail="Invalid authentication scheme")
+
+    token = creds.credentials
    try:
-        scheme, token = authorization.split()
-        if scheme.lower() != "bearer":
-            raise HTTPException(status_code=401, detail="Invalid authentication scheme")
-
        payload = jwt.decode(
            token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"]
        )

-        if payload.get("tenant_id"):
-            # SimpleNamespace lets us access dictionary elements like attributes
-            auth_data = SimpleNamespace(
-                id=UUID(payload["user_id"]),
-                tenant_id=UUID(payload["tenant_id"]),
-                roles=payload["roles"],
-            )
-        else:
-            auth_data = SimpleNamespace(id=UUID(payload["user_id"]), tenant_id=None, roles=[])
-
+        auth_data = SimpleNamespace(id=UUID(payload["user_id"]))
        return auth_data

    except jwt.ExpiredSignatureError:
--- a/cognee/modules/users/methods/get_default_user.py
+++ b/cognee/modules/users/methods/get_default_user.py
@ -1,5 +1,6 @@
 from types import SimpleNamespace
 from sqlalchemy.orm import selectinload
+from sqlalchemy.exc import NoResultFound
 from sqlalchemy.future import select
 from cognee.modules.users.models import User
 from cognee.base_config import get_base_config
@ -33,5 +34,6 @@ async def get_default_user() -> SimpleNamespace:
    except Exception as error:
        if "principals" in str(error.args):
            raise DatabaseNotCreatedError() from error
-
-        raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error
+        if isinstance(error, NoResultFound):
+            raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error
+        raise
--- a/cognee/modules/users/methods/get_user.py
+++ b/cognee/modules/users/methods/get_user.py
@ -1,7 +1,9 @@
 from uuid import UUID
 from sqlalchemy import select
-from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import selectinload
+import sqlalchemy.exc
 from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.infrastructure.databases.exceptions import EntityNotFoundError
 from ..models import User


@ -12,9 +14,12 @@ async def get_user(user_id: UUID):
        user = (
            await session.execute(
                select(User)
-                .options(joinedload(User.roles), joinedload(User.tenant))
+                .options(selectinload(User.roles), selectinload(User.tenant))
                .where(User.id == user_id)
            )
        ).scalar()

+        if not user:
+            raise EntityNotFoundError(message=f"Could not find user: {user_id}")
+
        return user
--- a/cognee/modules/users/models/ACL.py
+++ b/cognee/modules/users/models/ACL.py
@ -15,8 +15,8 @@ class ACL(Base):

    principal_id = Column(UUID, ForeignKey("principals.id"))
    permission_id = Column(UUID, ForeignKey("permissions.id"))
-    data_id = Column(UUID, ForeignKey("data.id", ondelete="CASCADE"))
+    dataset_id = Column(UUID, ForeignKey("datasets.id", ondelete="CASCADE"))

    principal = relationship("Principal")
    permission = relationship("Permission")
-    data = relationship("Data", back_populates="acls")
+    dataset = relationship("Dataset", back_populates="acls")
--- a/cognee/modules/users/models/DatasetDatabase.py
+++ b/cognee/modules/users/models/DatasetDatabase.py
@ -0,0 +1,19 @@
+from datetime import datetime, timezone
+
+from sqlalchemy import Column, DateTime, String, UUID, ForeignKey
+from cognee.infrastructure.databases.relational import Base
+
+
+class DatasetDatabase(Base):
+    __tablename__ = "dataset_database"
+
+    owner_id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), index=True)
+    dataset_id = Column(
+        UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
+    )
+
+    vector_database_name = Column(String, unique=True, nullable=False)
+    graph_database_name = Column(String, unique=True, nullable=False)
+
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
--- a/cognee/modules/users/models/Tenant.py
+++ b/cognee/modules/users/models/Tenant.py
@ -11,6 +11,8 @@ class Tenant(Principal):
    id = Column(UUID, ForeignKey("principals.id"), primary_key=True)
    name = Column(String, unique=True, nullable=False, index=True)

+    owner_id = Column(UUID, index=True)
+
    # One-to-Many relationship with User; specify the join via User.tenant_id
    users = relationship(
        "User",
--- a/cognee/modules/users/models/init.py
+++ b/cognee/modules/users/models/init.py
@ -1,6 +1,7 @@
 from .User import User
 from .Role import Role
 from .UserRole import UserRole
+from .DatasetDatabase import DatasetDatabase
 from .RoleDefaultPermissions import RoleDefaultPermissions
 from .UserDefaultPermissions import UserDefaultPermissions
 from .TenantDefaultPermissions import TenantDefaultPermissions
--- a/cognee/modules/users/permissions/init.py
+++ b/cognee/modules/users/permissions/init.py
@ -0,0 +1 @@
+from .permission_types import PERMISSION_TYPES
--- a/cognee/modules/users/permissions/methods/init.py
+++ b/cognee/modules/users/permissions/methods/init.py
@ -1,6 +1,13 @@
-from .check_permission_on_documents import check_permission_on_documents
-from .give_permission_on_document import give_permission_on_document
+from .get_role import get_role
+from .get_tenant import get_tenant
+from .get_principal import get_principal
+from .get_principal_datasets import get_principal_datasets
+from .get_all_user_permission_datasets import get_all_user_permission_datasets
+from .get_specific_user_permission_datasets import get_specific_user_permission_datasets
+from .check_permission_on_dataset import check_permission_on_dataset
+from .give_permission_on_dataset import give_permission_on_dataset
 from .get_document_ids_for_user import get_document_ids_for_user
+from .authorized_give_permission_on_datasets import authorized_give_permission_on_datasets
 from .give_default_permission_to_tenant import give_default_permission_to_tenant
 from .give_default_permission_to_role import give_default_permission_to_role
 from .give_default_permission_to_user import give_default_permission_to_user
--- a/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py
+++ b/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py
@ -0,0 +1,23 @@
+from typing import Union, List
+
+from cognee.modules.users.permissions.methods import get_principal
+from cognee.modules.users.permissions.methods import give_permission_on_dataset
+from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
+from uuid import UUID
+
+
+async def authorized_give_permission_on_datasets(
+    principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
+):
+    # If only a single dataset UUID is provided transform it to a list
+    if not isinstance(dataset_ids, list):
+        dataset_ids = [dataset_ids]
+
+    principal = await get_principal(principal_id)
+
+    # Check if request owner has permission to share dataset access
+    datasets = await get_specific_user_permission_datasets(owner_id, "share", dataset_ids)
+
+    # TODO: Do we want to enforce sharing of datasets to only be between users of the same tenant?
+    for dataset in datasets:
+        await give_permission_on_dataset(principal, dataset.id, permission_name)
--- a/cognee/modules/users/permissions/methods/check_permission_on_documents.py
+++ b/cognee/modules/users/permissions/methods/check_permission_on_documents.py
@ -13,29 +13,29 @@ from ...models.ACL import ACL
 logger = get_logger()


-async def check_permission_on_documents(user: User, permission_type: str, document_ids: list[UUID]):
+async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
    if user is None:
        user = await get_default_user()

-    # TODO: Enable user role permissions again. Temporarily disabled during rework.
+    # # TODO: Enable user role permissions again. Temporarily disabled during rework.
    # user_roles_ids = [role.id for role in user.roles]
    user_roles_ids = []

    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
+        # If dataset id was returned it means the user has permission to access it
        result = await session.execute(
            select(ACL)
            .join(ACL.permission)
-            .options(joinedload(ACL.data))
+            .options(joinedload(ACL.dataset))
            .where(ACL.principal_id.in_([user.id, *user_roles_ids]))
            .where(ACL.permission.has(name=permission_type))
        )
        acls = result.unique().scalars().all()
-        data_ids = [acl.data.id for acl in acls]
-        has_permissions = all(document_id in data_ids for document_id in document_ids)
+        has_permission = dataset_id in [acl.dataset.id for acl in acls]

-        if not has_permissions:
+        if not has_permission:
            raise PermissionDeniedError(
                message=f"User {user.id} does not have {permission_type} permission on documents"
            )
--- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
@ -0,0 +1,31 @@
+from cognee.shared.logging_utils import get_logger
+
+from ...models.User import User
+from cognee.modules.data.models.Dataset import Dataset
+from cognee.modules.users.permissions.methods import get_principal_datasets
+from cognee.modules.users.permissions.methods import get_role, get_tenant
+
+logger = get_logger()
+
+
+async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
+    datasets = list()
+    # Get all datasets User has explicit access to
+    datasets.extend(await get_principal_datasets(user, permission_type))
+
+    if user.tenant_id:
+        # Get all datasets all tenants have access to
+        tenant = await get_tenant(user.tenant_id)
+        datasets.extend(await get_principal_datasets(tenant, permission_type))
+        # Get all datasets Users roles have access to
+        for role_name in user.roles:
+            role = await get_role(user.tenant_id, role_name)
+            datasets.extend(await get_principal_datasets(role, permission_type))
+
+    # Deduplicate datasets with same ID
+    unique = {}
+    for dataset in datasets:
+        # If the dataset id key already exists, leave the dictionary unchanged.
+        unique.setdefault(dataset.id, dataset)
+
+    return list(unique.values())
--- a/cognee/modules/users/permissions/methods/get_document_ids_for_user.py
+++ b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py
@ -1,7 +1,9 @@
 from uuid import UUID
+
+from cognee.modules.data.methods import get_dataset_data
 from sqlalchemy import select
 from cognee.infrastructure.databases.relational import get_relational_engine
-from cognee.modules.data.models import Dataset, DatasetData, Data
+from cognee.modules.data.models import Dataset, DatasetData
 from ...models import ACL, Permission


@ -10,10 +12,10 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -

    async with db_engine.get_async_session() as session:
        async with session.begin():
-            document_ids = (
+            dataset_ids = (
                await session.scalars(
-                    select(Data.id)
-                    .join(ACL.data)
+                    select(Dataset.id)
+                    .join(ACL.dataset)
                    .join(ACL.permission)
                    .where(
                        ACL.principal_id == user_id,
@ -22,9 +24,15 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -
                )
            ).all()

+            # Get documents from datasets user has read access for
+            document_ids = []
+            for dataset_id in dataset_ids:
+                data_list = await get_dataset_data(dataset_id)
+                document_ids.extend([data.id for data in data_list])
+
            if datasets:
-                documents_ids_in_dataset = set()
                # If datasets are specified filter out documents that aren't part of the specified datasets
+                documents_ids_in_dataset = set()
                for dataset in datasets:
                    # Find dataset id for dataset element
                    dataset_id = (
--- a/cognee/modules/users/permissions/methods/get_principal.py
+++ b/cognee/modules/users/permissions/methods/get_principal.py
@ -0,0 +1,14 @@
+from sqlalchemy import select
+from uuid import UUID
+
+from cognee.infrastructure.databases.relational import get_relational_engine
+from ...models.Principal import Principal
+
+
+async def get_principal(principal_id: UUID):
+    db_engine = get_relational_engine()
+
+    async with db_engine.get_async_session() as session:
+        result = await session.execute(select(Principal).where(Principal.id == principal_id))
+        principal = result.unique().scalar_one()
+        return principal
--- a/cognee/modules/users/permissions/methods/get_principal_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_principal_datasets.py
@ -0,0 +1,24 @@
+from sqlalchemy import select
+from sqlalchemy.orm import joinedload
+
+from cognee.infrastructure.databases.relational import get_relational_engine
+
+from ...models.Principal import Principal
+from cognee.modules.data.models.Dataset import Dataset
+from ...models.ACL import ACL
+
+
+async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
+    db_engine = get_relational_engine()
+
+    async with db_engine.get_async_session() as session:
+        # If dataset id was returned it means the principal has permission to access it
+        result = await session.execute(
+            select(ACL)
+            .join(ACL.permission)
+            .options(joinedload(ACL.dataset))
+            .where(ACL.principal_id == principal.id)
+            .where(ACL.permission.has(name=permission_type))
+        )
+        acls = result.unique().scalars().all()
+        return [acl.dataset for acl in acls]
--- a/cognee/modules/users/permissions/methods/get_role.py
+++ b/cognee/modules/users/permissions/methods/get_role.py
@ -0,0 +1,24 @@
+import sqlalchemy.exc
+from sqlalchemy import select
+from uuid import UUID
+
+from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.modules.users.exceptions import RoleNotFoundError
+
+from ...models.Role import Role
+
+
+async def get_role(tenant_id: UUID, role_name: str):
+    db_engine = get_relational_engine()
+
+    async with db_engine.get_async_session() as session:
+        try:
+            result = await session.execute(
+                select(Role).where(Role.name == role_name).where(Role.tenant_id == tenant_id)
+            )
+            role = result.unique().scalar_one()
+            if not role:
+                raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")
+            return role
+        except sqlalchemy.exc.NoResultFound:
+            raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")
--- a/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py
@ -0,0 +1,46 @@
+from uuid import UUID
+from cognee.modules.data.models.Dataset import Dataset
+from cognee.modules.users.permissions.methods.get_all_user_permission_datasets import (
+    get_all_user_permission_datasets,
+)
+from cognee.modules.users.exceptions import PermissionDeniedError
+from cognee.modules.users.methods import get_user
+
+
+async def get_specific_user_permission_datasets(
+    user_id: UUID, permission_type: str, dataset_ids: list[UUID] = None
+) -> list[Dataset]:
+    """
+        Return a list of datasets user has given permission for. If a list of datasets is provided,
+        verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
+    Args:
+        user_id:
+        permission_type:
+        dataset_ids:
+
+    Returns:
+        list[Dataset]: List of datasets user has permission for
+    """
+    user = await get_user(user_id)
+    # Find all datasets user has permission for
+    user_permission_access_datasets = await get_all_user_permission_datasets(user, permission_type)
+
+    # if specific datasets are provided filter out non provided datasets
+    if dataset_ids:
+        search_datasets = [
+            dataset for dataset in user_permission_access_datasets if dataset.id in dataset_ids
+        ]
+        # If there are requested datasets that user does not have access to raise error
+        if len(search_datasets) != len(dataset_ids):
+            raise PermissionDeniedError(
+                f"Request owner does not have necessary permission: [{permission_type}] for all datasets requested."
+            )
+    else:
+        search_datasets = user_permission_access_datasets
+
+    if len(search_datasets) == 0:
+        raise PermissionDeniedError(
+            f"Request owner does not have permission: [{permission_type}] for any dataset."
+        )
+
+    return search_datasets
--- a/cognee/modules/users/permissions/methods/get_tenant.py
+++ b/cognee/modules/users/permissions/methods/get_tenant.py
@ -0,0 +1,21 @@
+import sqlalchemy.exc
+from sqlalchemy import select
+from uuid import UUID
+
+from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.modules.users.exceptions import TenantNotFoundError
+from ...models.Tenant import Tenant
+
+
+async def get_tenant(tenant_id: UUID):
+    db_engine = get_relational_engine()
+
+    async with db_engine.get_async_session() as session:
+        try:
+            result = await session.execute(select(Tenant).where(Tenant.id == tenant_id))
+            tenant = result.unique().scalar_one()
+            if not tenant:
+                raise TenantNotFoundError
+            return tenant
+        except sqlalchemy.exc.NoResultFound:
+            raise TenantNotFoundError(message=f"Could not find tenant: {tenant_id}")
--- a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py
+++ b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py
@ -0,0 +1,46 @@
+from sqlalchemy.future import select
+from cognee.infrastructure.databases.relational import get_relational_engine
+from ...models import Principal, ACL, Permission
+from uuid import UUID
+from cognee.modules.users.permissions import PERMISSION_TYPES
+from cognee.modules.users.exceptions import PermissionNotFoundError
+
+
+async def give_permission_on_dataset(
+    principal: Principal,
+    dataset_id: UUID,
+    permission_name: str,
+):
+    db_engine = get_relational_engine()
+
+    async with db_engine.get_async_session() as session:
+        permission = (
+            (await session.execute(select(Permission).filter(Permission.name == permission_name)))
+            .scalars()
+            .first()
+        )
+
+        if permission_name not in PERMISSION_TYPES:
+            # If permission is not in allowed permission types
+            raise PermissionNotFoundError(
+                message=f"{permission_name} not found or not in allowed permission types"
+            )
+        elif permission is None:
+            permission = Permission(name=permission_name)
+            existing_acl = None
+        else:
+            # Check if the ACL entry already exists to avoid duplicates
+            existing_acl = await session.execute(
+                select(ACL).filter(
+                    ACL.principal_id == principal.id,
+                    ACL.dataset_id == dataset_id,
+                    ACL.permission_id == permission.id,
+                )
+            )
+            existing_acl = existing_acl.scalars().first()
+
+        # If no existing ACL entry is found, proceed to add a new one
+        if existing_acl is None:
+            acl = ACL(principal_id=principal.id, dataset_id=dataset_id, permission=permission)
+            session.add(acl)
+            await session.commit()
--- a/cognee/modules/users/permissions/methods/give_permission_on_document.py
+++ b/cognee/modules/users/permissions/methods/give_permission_on_document.py
@ -1,27 +0,0 @@
-from sqlalchemy.future import select
-from cognee.infrastructure.databases.relational import get_relational_engine
-from ...models import User, ACL, Permission
-
-
-async def give_permission_on_document(
-    user: User,
-    document_id: str,
-    permission_name: str,
-):
-    db_engine = get_relational_engine()
-
-    async with db_engine.get_async_session() as session:
-        permission = (
-            (await session.execute(select(Permission).filter(Permission.name == permission_name)))
-            .scalars()
-            .first()
-        )
-
-        if permission is None:
-            permission = Permission(name=permission_name)
-
-        acl = ACL(principal_id=user.id, data_id=document_id, permission=permission)
-
-        session.add(acl)
-
-        await session.commit()
--- a/cognee/modules/users/permissions/permission_types.py
+++ b/cognee/modules/users/permissions/permission_types.py
@ -0,0 +1 @@
+PERMISSION_TYPES = ["read", "write", "delete", "share"]
--- a/cognee/modules/users/roles/methods/add_user_to_role.py
+++ b/cognee/modules/users/roles/methods/add_user_to_role.py
@ -9,24 +9,40 @@ from cognee.infrastructure.databases.relational import get_relational_engine
 from cognee.modules.users.exceptions import (
    UserNotFoundError,
    RoleNotFoundError,
+    TenantNotFoundError,
+    PermissionDeniedError,
 )
 from cognee.modules.users.models import (
    User,
    Role,
+    Tenant,
    UserRole,
 )


-async def add_user_to_role(user_id: UUID, role_id: UUID):
+async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
        role = (await session.execute(select(Role).where(Role.id == role_id))).scalars().first()
+        tenant = (
+            (await session.execute(select(Tenant).where(Tenant.id == role.tenant_id)))
+            .scalars()
+            .first()
+        )

        if not user:
            raise UserNotFoundError
        elif not role:
            raise RoleNotFoundError
+        elif user.tenant_id != role.tenant_id:
+            raise TenantNotFoundError(
+                message="User tenant does not match role tenant. User cannot be added to role."
+            )
+        elif tenant.owner_id != owner_id:
+            raise PermissionDeniedError(
+                message="User submitting request does not have permission to add user to role."
+            )

        try:
            # Add association directly to the association table
--- a/cognee/modules/users/roles/methods/create_role.py
+++ b/cognee/modules/users/roles/methods/create_role.py
@ -4,6 +4,9 @@ from sqlalchemy.exc import IntegrityError

 from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
 from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.modules.users.methods import get_user
+from cognee.modules.users.permissions.methods import get_tenant
+from cognee.modules.users.exceptions import PermissionDeniedError
 from cognee.modules.users.models import (
    Role,
 )
@ -11,13 +14,21 @@ from cognee.modules.users.models import (

 async def create_role(
    role_name: str,
-    tenant_id: UUID,
+    owner_id: UUID,
 ):
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
+        user = await get_user(owner_id)
+        tenant = await get_tenant(user.tenant_id)
+
+        if owner_id != tenant.owner_id:
+            raise PermissionDeniedError(
+                "User submitting request does not have permission to create role for tenant."
+            )
+
        try:
            # Add association directly to the association table
-            role = Role(name=role_name, tenant_id=tenant_id)
+            role = Role(name=role_name, tenant_id=tenant.id)
            session.add(role)
        except IntegrityError:
            raise EntityAlreadyExistsError(message="Role already exists for tenant.")
--- a/cognee/modules/users/tenants/methods/init.py
+++ b/cognee/modules/users/tenants/methods/init.py
@ -1 +1,2 @@
 from .create_tenant import create_tenant
+from .add_user_to_tenant import add_user_to_tenant
--- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py
+++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py
@ -0,0 +1,44 @@
+from uuid import UUID
+from sqlalchemy.exc import IntegrityError
+
+from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
+from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.modules.users.methods import get_user
+from cognee.modules.users.permissions.methods import get_tenant
+from cognee.modules.users.exceptions import (
+    UserNotFoundError,
+    TenantNotFoundError,
+    PermissionDeniedError,
+)
+
+
+async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
+    db_engine = get_relational_engine()
+    async with db_engine.get_async_session() as session:
+        user = await get_user(user_id)
+        tenant = await get_tenant(tenant_id)
+
+        if not user:
+            raise UserNotFoundError
+        elif not tenant:
+            raise TenantNotFoundError
+
+        if tenant.owner_id != owner_id:
+            raise PermissionDeniedError(
+                message="Only tenant owner can add other users to organization."
+            )
+
+        try:
+            if user.tenant_id is None:
+                user.tenant_id = tenant_id
+            elif user.tenant_id == tenant_id:
+                return
+            else:
+                raise IntegrityError
+
+            await session.merge(user)
+            await session.commit()
+        except IntegrityError:
+            raise EntityAlreadyExistsError(
+                message="User is already part of a tenant. Only one tenant can be assigned to user."
+            )
--- a/cognee/modules/users/tenants/methods/create_tenant.py
+++ b/cognee/modules/users/tenants/methods/create_tenant.py
@ -1,19 +1,28 @@
+from uuid import UUID
 from sqlalchemy.exc import IntegrityError

 from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
 from cognee.infrastructure.databases.relational import get_relational_engine
 from cognee.modules.users.models import Tenant
+from cognee.modules.users.methods import get_user


-async def create_tenant(tenant_name: str):
+async def create_tenant(tenant_name: str, user_id: UUID):
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        try:
-            # Add association directly to the association table
-            tenant = Tenant(name=tenant_name)
+            user = await get_user(user_id)
+            if user.tenant_id:
+                raise EntityAlreadyExistsError(
+                    message="User already has a tenant. New tenant cannot be created."
+                )
+
+            tenant = Tenant(name=tenant_name, owner_id=user_id)
            session.add(tenant)
+            await session.flush()
+
+            user.tenant_id = tenant.id
+            await session.merge(user)
+            await session.commit()
        except IntegrityError:
            raise EntityAlreadyExistsError(message="Tenant already exists.")
-
-        await session.commit()
-        await session.refresh(tenant)
--- a/cognee/tasks/documents/init.py
+++ b/cognee/tasks/documents/init.py
@ -2,4 +2,4 @@ from .translate_text import translate_text
 from .detect_language import detect_language
 from .classify_documents import classify_documents
 from .extract_chunks_from_documents import extract_chunks_from_documents
-from .check_permissions_on_documents import check_permissions_on_documents
+from .check_permissions_on_dataset import check_permissions_on_dataset
--- a/cognee/tasks/documents/check_permissions_on_documents.py
+++ b/cognee/tasks/documents/check_permissions_on_documents.py
@ -1,10 +1,10 @@
 from cognee.modules.data.processing.document_types import Document
-from cognee.modules.users.permissions.methods import check_permission_on_documents
+from cognee.modules.users.permissions.methods import check_permission_on_dataset
 from typing import List


-async def check_permissions_on_documents(
-    documents: list[Document], user, permissions
+async def check_permissions_on_dataset(
+    documents: List[Document], context: dict, user, permissions
 ) -> List[Document]:
    """
    Validates a user's permissions on a list of documents.
@ -14,13 +14,12 @@ async def check_permissions_on_documents(
        - It is designed to validate multiple permissions in a sequential manner for the same set of documents.
        - Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
    """
-    document_ids = [document.id for document in documents]

    for permission in permissions:
-        await check_permission_on_documents(
+        await check_permission_on_dataset(
            user,
            permission,
-            document_ids,
+            context["dataset"].id,
        )

    return documents
--- a/cognee/tasks/ingestion/ingest_data.py
+++ b/cognee/tasks/ingestion/ingest_data.py
@ -2,6 +2,7 @@ import dlt
 import s3fs
 import json
 import inspect
+from uuid import UUID
 from typing import Union, BinaryIO, Any, List, Optional
 import cognee.modules.ingestion as ingestion
 from cognee.infrastructure.databases.relational import get_relational_engine
@ -9,7 +10,8 @@ from cognee.modules.data.methods import create_dataset, get_dataset_data, get_da
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.data.models.DatasetData import DatasetData
 from cognee.modules.users.models import User
-from cognee.modules.users.permissions.methods import give_permission_on_document
+from cognee.modules.users.permissions.methods import give_permission_on_dataset
+from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
 from .get_dlt_destination import get_dlt_destination
 from .save_data_item_to_storage import save_data_item_to_storage

@ -18,7 +20,11 @@ from cognee.api.v1.add.config import get_s3_config


 async def ingest_data(
-    data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None
+    data: Any,
+    dataset_name: str,
+    user: User,
+    node_set: Optional[List[str]] = None,
+    dataset_id: UUID = None,
 ):
    destination = get_dlt_destination()

@ -73,7 +79,11 @@ async def ingest_data(
                }

    async def store_data_to_dataset(
-        data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None
+        data: Any,
+        dataset_name: str,
+        user: User,
+        node_set: Optional[List[str]] = None,
+        dataset_id: UUID = None,
    ):
        if not isinstance(data, list):
            # Convert data to a list as we work with lists further down.
@ -104,7 +114,17 @@ async def ingest_data(
                db_engine = get_relational_engine()

                async with db_engine.get_async_session() as session:
-                    dataset = await create_dataset(dataset_name, user, session)
+                    if dataset_id:
+                        # Retrieve existing dataset
+                        dataset = await get_specific_user_permission_datasets(
+                            user.id, "write", [dataset_id]
+                        )
+                        # Convert from list to Dataset element
+                        if isinstance(dataset, list):
+                            dataset = dataset[0]
+                    else:
+                        # Create new one
+                        dataset = await create_dataset(dataset_name, user, session)

                    # Check to see if data should be updated
                    data_point = (
@ -138,6 +158,7 @@ async def ingest_data(
                            node_set=json.dumps(node_set) if node_set else None,
                            token_count=-1,
                        )
+                        session.add(data_point)

                    # Check if data is already in dataset
                    dataset_data = (
@ -150,17 +171,20 @@ async def ingest_data(
                    # If data is not present in dataset add it
                    if dataset_data is None:
                        dataset.data.append(data_point)
+                        await session.merge(dataset)

                    await session.commit()

-                await give_permission_on_document(user, data_id, "read")
-                await give_permission_on_document(user, data_id, "write")
+        await give_permission_on_dataset(user, dataset.id, "read")
+        await give_permission_on_dataset(user, dataset.id, "write")
+        await give_permission_on_dataset(user, dataset.id, "delete")
+        await give_permission_on_dataset(user, dataset.id, "share")

        return file_paths

    db_engine = get_relational_engine()

-    file_paths = await store_data_to_dataset(data, dataset_name, user, node_set)
+    file_paths = await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id)

    # Note: DLT pipeline has its own event loop, therefore objects created in another event loop
    # can't be used inside the pipeline
--- a/cognee/tests/test_parallel_databases.py
+++ b/cognee/tests/test_parallel_databases.py
@ -0,0 +1,71 @@
+import os
+import pathlib
+import cognee
+from cognee.modules.search.operations import get_history
+from cognee.modules.users.methods import get_default_user
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.search.types import SearchType
+
+logger = get_logger()
+
+
+async def main():
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_library")
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_library")
+        ).resolve()
+    )
+    cognee.config.system_root_directory(cognee_directory_path)
+
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    await cognee.add(["TEST1"], "test1")
+    await cognee.add(["TEST2"], "test2")
+
+    task_1_config = {
+        "vector_db_url": "cognee1.test",
+        "vector_db_key": "",
+        "vector_db_provider": "lancedb",
+    }
+    task_2_config = {
+        "vector_db_url": "cognee2.test",
+        "vector_db_key": "",
+        "vector_db_provider": "lancedb",
+    }
+
+    task_1_graph_config = {
+        "graph_database_provider": "kuzu",
+        "graph_file_path": "kuzu1.db",
+    }
+    task_2_graph_config = {
+        "graph_database_provider": "kuzu",
+        "graph_file_path": "kuzu2.db",
+    }
+
+    # schedule both cognify calls concurrently
+    task1 = asyncio.create_task(
+        cognee.cognify(
+            ["test1"], vector_db_config=task_1_config, graph_db_config=task_1_graph_config
+        )
+    )
+    task2 = asyncio.create_task(
+        cognee.cognify(
+            ["test2"], vector_db_config=task_2_config, graph_db_config=task_2_graph_config
+        )
+    )
+
+    # wait until both are done (raises first error if any)
+    await asyncio.gather(task1, task2)
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main(), debug=True)
--- a/cognee/tests/test_pgvector.py
+++ b/cognee/tests/test_pgvector.py
@ -144,7 +144,6 @@ async def main():
    graph_completion = await cognee.search(
        query_type=SearchType.GRAPH_COMPLETION,
        query_text=random_node_name,
-        datasets=[dataset_name_2],
    )
    assert len(graph_completion) != 0, "Completion result is empty."
    print("Completion result is:")
--- a/cognee/tests/test_qdrant.py
+++ b/cognee/tests/test_qdrant.py
@ -49,7 +49,11 @@ async def main():
    from cognee.infrastructure.databases.vector import get_vector_engine

    vector_engine = get_vector_engine()
-    random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
+    search_results = await vector_engine.search("Entity_name", "Quantum computer")
+
+    assert len(search_results) != 0, "The search results list is empty."
+
+    random_node = search_results[0]
    random_node_name = random_node.payload["text"]

    search_results = await cognee.search(
--- a/cognee/tests/unit/modules/search/search_methods_test.py
+++ b/cognee/tests/unit/modules/search/search_methods_test.py
@ -24,13 +24,9 @@ def mock_user():
@pytest.mark.asyncio
@patch.object(search_module, "log_query")
@patch.object(search_module, "log_result")
-@patch.object(search_module, "get_document_ids_for_user")
@patch.object(search_module, "specific_search")
-@patch.object(search_module, "parse_id")
 async def test_search(
-    mock_parse_id,
    mock_specific_search,
-    mock_get_document_ids,
    mock_log_result,
    mock_log_query,
    mock_user,
@ -48,26 +44,19 @@ async def test_search(
    # Mock document IDs
    doc_id1 = uuid.uuid4()
    doc_id2 = uuid.uuid4()
-    doc_id3 = uuid.uuid4()  # This one will be filtered out
-    mock_get_document_ids.return_value = [doc_id1, doc_id2]

    # Mock search results
    search_results = [
        {"document_id": str(doc_id1), "content": "Result 1"},
        {"document_id": str(doc_id2), "content": "Result 2"},
-        {"document_id": str(doc_id3), "content": "Result 3"},  # Should be filtered out
    ]
    mock_specific_search.return_value = search_results

-    # Mock parse_id to return the same UUID
-    mock_parse_id.side_effect = lambda x: uuid.UUID(x) if x else None
-
    # Execute
-    results = await search(query_text, query_type, datasets, mock_user)
+    await search(query_text, query_type, datasets, mock_user)

    # Verify
    mock_log_query.assert_called_once_with(query_text, query_type.value, mock_user.id)
-    mock_get_document_ids.assert_called_once_with(mock_user.id, datasets)
    mock_specific_search.assert_called_once_with(
        query_type,
        query_text,
@ -78,11 +67,6 @@ async def test_search(
        node_name=None,
    )

-    # Only the first two results should be included (doc_id3 is filtered out)
-    assert len(results) == 2
-    assert results[0]["document_id"] == str(doc_id1)
-    assert results[1]["document_id"] == str(doc_id2)
-
    # Verify result logging
    mock_log_result.assert_called_once()
    # Check that the first argument is the query ID
--- a/poetry.lock
+++ b/poetry.lock
@ -435,7 +435,7 @@ description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version < \"3.11\""
+markers = "python_version == \"3.10\""
 files = [
    {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
    {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
@ -448,7 +448,7 @@ description = "Timeout context manager for asyncio programs"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"falkordb\" and python_full_version < \"3.11.3\" and python_version == \"3.11\""
+markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\" and extra == \"falkordb\""
 files = [
    {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
    {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@ -593,7 +593,7 @@ description = "Backport of CPython tarfile module"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"deepeval\" and python_version <= \"3.11\""
+markers = "(python_version == \"3.10\" or python_version == \"3.11\") and extra == \"deepeval\""
 files = [
    {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
    {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
@ -1226,7 +1226,7 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main"]
-markers = "(sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (python_version < \"3.13\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\")"
+markers = "(platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or platform_system == \"Windows\")"
 files = [
    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@ -2098,7 +2098,7 @@ description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version < \"3.11\""
+markers = "python_version == \"3.10\""
 files = [
    {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
    {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@ -2225,7 +2225,7 @@ description = "Fast, light, accurate library built for retrieval embedding gener
 optional = true
 python-versions = ">=3.9.0"
 groups = ["main"]
-markers = "python_version < \"3.13\" and extra == \"codegraph\""
+markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
 files = [
    {file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"},
    {file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"},
@ -2974,7 +2974,7 @@ description = "HTTP/2-based RPC framework"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\" or python_version < \"3.11\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\")"
+markers = "python_version == \"3.10\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\") or extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\""
 files = [
    {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
    {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"},
@ -3078,7 +3078,7 @@ description = "Protobuf code generator for gRPC"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"weaviate\" or python_version >= \"3.13\" and (extra == \"weaviate\" or extra == \"qdrant\")"
+markers = "extra == \"weaviate\""
 files = [
    {file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"},
    {file = "grpcio_tools-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:6a722bba714392de2386569c40942566b83725fa5c5450b8910e3832a5379469"},
@ -3631,7 +3631,7 @@ description = "IPython: Productive Interactive Computing"
 optional = true
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version < \"3.11\" and (extra == \"notebook\" or extra == \"dev\")"
+markers = "python_version == \"3.10\" and (extra == \"notebook\" or extra == \"dev\")"
 files = [
    {file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"},
    {file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"},
@ -4454,50 +4454,50 @@ files = [

 [[package]]
 name = "kuzu"
-version = "0.8.2"
+version = "0.9.0"
 description = "Highly scalable, extremely fast, easy-to-use embeddable graph database"
 optional = true
 python-versions = "*"
 groups = ["main"]
-markers = "extra == \"kuzu\""
+markers = "extra == \"api\" or extra == \"kuzu\""
 files = [
-    {file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:78bcdf6cc7b130bce8b307709e8d7bddd2e9104b2b696a9dc52574556e754570"},
-    {file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b42e3e9b1eacf830700287b05e96f9455b89dd4140085053e6c86b32c61e8d5c"},
-    {file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf06c602dc0231268d9cfa56a62afef15f8fca3be1ccd2cad22047a14bff4ae0"},
-    {file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50a873e7cd0c2e8e3093e9af14cffb14e49f1f67eceb32df3d0454ce101402d3"},
-    {file = "kuzu-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4d36261444d31432606f3f3ed00624f1a3a8edcf7d830564c72b76ffbdf4d318"},
-    {file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6c1694c6d1b19c46ad5d416cac429ccf1fe91aca4d367664e3aa0afa59800f93"},
-    {file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:00156c64523a1377ffced998bdb031709336f90543da69544c0ab4b40d533692"},
-    {file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc75f26afe8815b046cfb0d931303da6c36ce3afb49d4ae18a3899f23e62020f"},
-    {file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f0de6910724a74cc492354e903cf76db78b6353eef1e2edfa0b79d600c3c572"},
-    {file = "kuzu-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:56e99c39a725943aa7ad96ada8f29706da3d53cc98385f2c663b8ea026f0dce3"},
-    {file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adcc250b34963a6eea62b59d47a091018d83e61fb2e95552795ab61f103052be"},
-    {file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:f72036924466143675980baed02a26c0fca15b6254c11de9a9c18d28fe66247e"},
-    {file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2fd7895fdfd9df880091d32bfb79c148f849659c67e2b9e185f952a6bde9139"},
-    {file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:68486e291aa8a61264be7e31233ec34eeb6da2402f4b980c3f2b67f9ccbbea3a"},
-    {file = "kuzu-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:7cce7d06e6f09cd488c62be7cafe78752b037ed9e6585ed3da9df029104b1987"},
-    {file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa0495f856f2e5f5067e281dab3fbc170aba0721d1f56156a8cd9fa50e706f91"},
-    {file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:823577b472ba63c3b36e5ff81e2b744736f9eaf0b71585c247f3defc9d268f53"},
-    {file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bde76f38d293f49ad283a4831bd32d41f185b93a75d388d67f9b8996678203e9"},
-    {file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cdb189012613ecd26630096796e3817c260deea85782e764309cd36b2c39dac5"},
-    {file = "kuzu-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:71fb98721f9c46f960a5c3baea6b083026485c4b9a3e74ab01418243e29e3753"},
-    {file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e12726af2cb552ab7b60e2b4312469359bb3b4b45ddbcfb75220def4be6f566"},
-    {file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055f2cd9741bf39161f9ccff80428f8fb80b1910b2450b05bbe848487ba694f5"},
-    {file = "kuzu-0.8.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:18cb3da3a650f8dfde3639fbd6319a5ad6f98f60689c5dd96d20d8d1fc184d4c"},
-    {file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e55a8fddc21ac3e27b3cf2815d93264dd3c89e9ad8c7f3960d51bdfe48a02709"},
-    {file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d93600aceacdd7903aa39f016cb641811f96e4825b027a135aaaa1d82e23d24"},
-    {file = "kuzu-0.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:68601d9e741c7815c3d3f46a9c6884853388bcc6920945f069d5dc4f9492c9c5"},
-    {file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32d7ff56d793df27f76129b8b15bd85c940e59bcb67acd189b6a5ed1af5e8b44"},
-    {file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:5e639f24be2fca78bf3890774f273aa1a6b149bfdbeb5c7e966e03b8f610be98"},
-    {file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1caf46e2721dabed94b65cdcf3990551af2f3913c3f2dcd39f3e5397f0134243"},
-    {file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5333c9e4557ccbfef7b822793ec382848411c8d11fdee063064b41bd1828404"},
-    {file = "kuzu-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:765a8bd4c5b9d24583eb8aaa20ecd753d78220138a82bf643ec592ffb8128298"},
-    {file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a215ff235d17a41c50d1cf2bd8e67a196eff32f23e59d989b1a40e6192f2008"},
-    {file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:074b5440186e4214b653d46f8d5a15d4b4cae1185d4656eaf598fe9b840fcdca"},
-    {file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32303a9533674a35e52d429f1446a82e2fc97c423618bc86aaafef1d4d2621e4"},
-    {file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0baea115bc55c8ed710f2beae8f02e46cf2bac42326b4e2c3acd25a76031f59d"},
-    {file = "kuzu-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:70e031131c5b8e327edd63993b05fb04196b74d0ade1baf0f4005968610310ed"},
-    {file = "kuzu-0.8.2.tar.gz", hash = "sha256:68ad72b3ef6a32a41ecfa955fa4ca9ca0c8a36d3a1bc13e34cc70c971b2b8ca7"},
+    {file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec9f216d67c092ea52086c99cf4b1deabe0f8daaf47c80cf1892b3b41c57d58a"},
+    {file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bda6d845bf1c7da204ffa7730573118f2d43fe6b14b1a5d0d2845ec3d3481362"},
+    {file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab5b28f101c93899fc15668b6cb25f6db3d4a9844fcc4affed293caaaafaa4b7"},
+    {file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:183bb1de19ffec1c3b07c0b4d5eecf02eb4eeafc1d50aea409bc91e1fad4d6d2"},
+    {file = "kuzu-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:2e36ce7da1bbebb538082656de18a717895d9352a33c8bcac170ef2fc22a4902"},
+    {file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82dd690d823df816e7826945e5243a4ae65e3e948ef512709a59205b84b9f6dd"},
+    {file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:c394e019a14e9c5636228cf1acd333997c31e5da3d9a60a1df2c03b828438432"},
+    {file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7d493f88ed31eada4b88a92b115bc6085c60498c47336ab06a489e75a727bab"},
+    {file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:171b47cf2b3923c813f1ed88fb9d3964a9355129b5d3ebca54eba3450bfc1f97"},
+    {file = "kuzu-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:3c8a8a611f599801c8db6aeffb978cd1badcfa3ec8f79c15b701810fee71765f"},
+    {file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:509af4029f9dcb9c3e843a825df44ec30009a70fad891cbcfb611c3b8cdfefd6"},
+    {file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:885f17f6e46c15ecef121fc57a941f8b60f0a5c1d3995813bb7a4c7437fb2259"},
+    {file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f2e35aa345b543a4a21de0e82b70eac4c753987cfa4ded75ae7f9f23edbf11"},
+    {file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67430c9813607a3b901c4a1e6bfb3b93538af230bc821e675c552a162818f589"},
+    {file = "kuzu-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:549f4a72f815554fb998582876c5875cb0917a192e6a58d196e8247fd8902701"},
+    {file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ec2e709599b4015d0a179a191dd7850e7bf076f83b37b70d0dc2e4ee59ce7725"},
+    {file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:8aad4fbd74b283ffb0b115138dfc62d9775c8f19ba62ab243e55e3cd648652b6"},
+    {file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba9dd4f412e31d34345b6461fc9489955ae9566abf426e56af478b6e791b735a"},
+    {file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:340502cbce54f21a5b2440a75c28d61ddfd26d6d6848e9daa6140798bdd5b367"},
+    {file = "kuzu-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:e1ddb189dfa2aee0123dcd1a5ccc5b831a7f297233a09fccfd76294fc2f9e6bd"},
+    {file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fae68db87ba48268228c89e70ed1fde2f43843d8ed6b2debaafd314c45e8542"},
+    {file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0279ba37c639d96f303eb6ad4481e634495be31210991d8008c385ee50b4e0a"},
+    {file = "kuzu-0.9.0-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3ca7424fe3831df687552b89903aa57fb88efff9c25df15c5d678fae7c933199"},
+    {file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bce9284913434661f47cecfc763f8997a61ebd2bb7bfe993970c1403924708fa"},
+    {file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:66040cdf9a59a5423b49c3d2bc01a089114b573ee1345d5a7c912276fbca0135"},
+    {file = "kuzu-0.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8e195774364123845df071eddb18873ce8c78244dd6f854badfe65053b058088"},
+    {file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2906f29ee36f9f642bdb8f5222c94f667092e38bde7dc53ebb252f9eb524ab6a"},
+    {file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:4c3218e266766080fe1b31325d0156d1b334f62ae23dac854c3e4919115ef8c6"},
+    {file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a26214c1600c21f5e4aa96585706953a8792ad77e14788710d78f8af0d6b74ec"},
+    {file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b153fb28db9336757346eabb24b8c179b4ed48578a0ef158210fbc935df2184"},
+    {file = "kuzu-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:b6ee075e2571b11a434efb004cb0b3a2fbd7aa416ae680816869f1388e5fc734"},
+    {file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:56874ae750ff99b15c959d884b175adf24ac912ab08e084c42784902b2bce2fb"},
+    {file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:6e0265b1ad445500397dc0df3cc4e7faddfd67fcd3d0952d9a4cdab6b77b47e9"},
+    {file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d66e69a3e135ea123cc7c9c2e507bbb614ffdbfe7be835782c6a588ae63ff900"},
+    {file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e11c8b7186798ad95563e1d7ebf84495d817c406bd28c21af7170467e37e35e"},
+    {file = "kuzu-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:4fb80eb6c71b02c4e57e3570b079c494082f7ff819d4c06ac482914f29211294"},
+    {file = "kuzu-0.9.0.tar.gz", hash = "sha256:2e59f3d4d1fc385e9e90d7ae09f072ec2f4cfeff508582523a0034ceb076f6eb"},
 ]

 [[package]]
@ -5087,7 +5087,7 @@ description = "Python logging made (stupidly) simple"
 optional = true
 python-versions = "<4.0,>=3.5"
 groups = ["main"]
-markers = "python_version < \"3.13\" and extra == \"codegraph\""
+markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
 files = [
    {file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"},
    {file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"},
@ -5827,7 +5827,7 @@ description = "Python extension for MurmurHash (MurmurHash3), a set of fast and
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version < \"3.13\" and extra == \"codegraph\""
+markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
 files = [
    {file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec"},
    {file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a"},
@ -6437,6 +6437,7 @@ description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
+markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
 files = [
    {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
    {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@ -6476,6 +6477,69 @@ files = [
    {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
 ]

+[[package]]
+name = "numpy"
+version = "2.1.0"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+markers = "python_version >= \"3.13\""
+files = [
+    {file = "numpy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6326ab99b52fafdcdeccf602d6286191a79fe2fda0ae90573c5814cd2b0bc1b8"},
+    {file = "numpy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0937e54c09f7a9a68da6889362ddd2ff584c02d015ec92672c099b61555f8911"},
+    {file = "numpy-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:30014b234f07b5fec20f4146f69e13cfb1e33ee9a18a1879a0142fbb00d47673"},
+    {file = "numpy-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:899da829b362ade41e1e7eccad2cf274035e1cb36ba73034946fccd4afd8606b"},
+    {file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08801848a40aea24ce16c2ecde3b756f9ad756586fb2d13210939eb69b023f5b"},
+    {file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:398049e237d1aae53d82a416dade04defed1a47f87d18d5bd615b6e7d7e41d1f"},
+    {file = "numpy-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0abb3916a35d9090088a748636b2c06dc9a6542f99cd476979fb156a18192b84"},
+    {file = "numpy-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e2350aea18d04832319aac0f887d5fcec1b36abd485d14f173e3e900b83e33"},
+    {file = "numpy-2.1.0-cp310-cp310-win32.whl", hash = "sha256:f6b26e6c3b98adb648243670fddc8cab6ae17473f9dc58c51574af3e64d61211"},
+    {file = "numpy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f505264735ee074250a9c78247ee8618292091d9d1fcc023290e9ac67e8f1afa"},
+    {file = "numpy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:76368c788ccb4f4782cf9c842b316140142b4cbf22ff8db82724e82fe1205dce"},
+    {file = "numpy-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f8e93a01a35be08d31ae33021e5268f157a2d60ebd643cfc15de6ab8e4722eb1"},
+    {file = "numpy-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9523f8b46485db6939bd069b28b642fec86c30909cea90ef550373787f79530e"},
+    {file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54139e0eb219f52f60656d163cbe67c31ede51d13236c950145473504fa208cb"},
+    {file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3"},
+    {file = "numpy-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:378cb4f24c7d93066ee4103204f73ed046eb88f9ad5bb2275bb9fa0f6a02bd36"},
+    {file = "numpy-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8f699a709120b220dfe173f79c73cb2a2cab2c0b88dd59d7b49407d032b8ebd"},
+    {file = "numpy-2.1.0-cp311-cp311-win32.whl", hash = "sha256:ffbd6faeb190aaf2b5e9024bac9622d2ee549b7ec89ef3a9373fa35313d44e0e"},
+    {file = "numpy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0af3a5987f59d9c529c022c8c2a64805b339b7ef506509fba7d0556649b9714b"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe76d75b345dc045acdbc006adcb197cc680754afd6c259de60d358d60c93736"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f358ea9e47eb3c2d6eba121ab512dfff38a88db719c38d1e67349af210bc7529"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:dd94ce596bda40a9618324547cfaaf6650b1a24f5390350142499aa4e34e53d1"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b47c551c6724960479cefd7353656498b86e7232429e3a41ab83be4da1b109e8"},
+    {file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0756a179afa766ad7cb6f036de622e8a8f16ffdd55aa31f296c870b5679d745"},
+    {file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24003ba8ff22ea29a8c306e61d316ac74111cebf942afbf692df65509a05f111"},
+    {file = "numpy-2.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b34fa5e3b5d6dc7e0a4243fa0f81367027cb6f4a7215a17852979634b5544ee0"},
+    {file = "numpy-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4f982715e65036c34897eb598d64aef15150c447be2cfc6643ec7a11af06574"},
+    {file = "numpy-2.1.0-cp312-cp312-win32.whl", hash = "sha256:c4cd94dfefbefec3f8b544f61286584292d740e6e9d4677769bc76b8f41deb02"},
+    {file = "numpy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0cdef204199278f5c461a0bed6ed2e052998276e6d8ab2963d5b5c39a0500bc"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ab81ccd753859ab89e67199b9da62c543850f819993761c1e94a75a814ed667"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442596f01913656d579309edcd179a2a2f9977d9a14ff41d042475280fc7f34e"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:848c6b5cad9898e4b9ef251b6f934fa34630371f2e916261070a4eb9092ffd33"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:54c6a63e9d81efe64bfb7bcb0ec64332a87d0b87575f6009c8ba67ea6374770b"},
+    {file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652e92fc409e278abdd61e9505649e3938f6d04ce7ef1953f2ec598a50e7c195"},
+    {file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab32eb9170bf8ffcbb14f11613f4a0b108d3ffee0832457c5d4808233ba8977"},
+    {file = "numpy-2.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:8fb49a0ba4d8f41198ae2d52118b050fd34dace4b8f3fb0ee34e23eb4ae775b1"},
+    {file = "numpy-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44e44973262dc3ae79e9063a1284a73e09d01b894b534a769732ccd46c28cc62"},
+    {file = "numpy-2.1.0-cp313-cp313-win32.whl", hash = "sha256:ab83adc099ec62e044b1fbb3a05499fa1e99f6d53a1dde102b2d85eff66ed324"},
+    {file = "numpy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:de844aaa4815b78f6023832590d77da0e3b6805c644c33ce94a1e449f16d6ab5"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:343e3e152bf5a087511cd325e3b7ecfd5b92d369e80e74c12cd87826e263ec06"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f07fa2f15dabe91259828ce7d71b5ca9e2eb7c8c26baa822c825ce43552f4883"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5474dad8c86ee9ba9bb776f4b99ef2d41b3b8f4e0d199d4f7304728ed34d0300"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1f817c71683fd1bb5cff1529a1d085a57f02ccd2ebc5cd2c566f9a01118e3b7d"},
+    {file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a3336fbfa0d38d3deacd3fe7f3d07e13597f29c13abf4d15c3b6dc2291cbbdd"},
+    {file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a894c51fd8c4e834f00ac742abad73fc485df1062f1b875661a3c1e1fb1c2f6"},
+    {file = "numpy-2.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:9156ca1f79fc4acc226696e95bfcc2b486f165a6a59ebe22b2c1f82ab190384a"},
+    {file = "numpy-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:624884b572dff8ca8f60fab591413f077471de64e376b17d291b19f56504b2bb"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15ef8b2177eeb7e37dd5ef4016f30b7659c57c2c0b57a779f1d537ff33a72c7b"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e5f0642cdf4636198a4990de7a71b693d824c56a757862230454629cf62e323d"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15976718c004466406342789f31b6673776360f3b1e3c575f25302d7e789575"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6c1de77ded79fef664d5098a66810d4d27ca0224e9051906e634b3f7ead134c2"},
+    {file = "numpy-2.1.0.tar.gz", hash = "sha256:7dc90da0081f7e1da49ec4e398ede6a8e9cc4f5ebe5f9e06b443ed889ee9aaa2"},
+]
+
 [[package]]
 name = "oauthlib"
 version = "3.2.2"
@ -6929,8 +6993,8 @@ files = [
 [package.dependencies]
 numpy = [
    {version = ">=1.22.4", markers = "python_version < \"3.11\""},
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@ -7028,7 +7092,7 @@ description = "Python datetimes made easy"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version < \"3.13\""
+markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
 files = [
    {file = "pendulum-3.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aa545a59e6517cf43597455a6fb44daa4a6e08473d67a7ad34e4fa951efb9620"},
    {file = "pendulum-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:299df2da6c490ede86bb8d58c65e33d7a2a42479d21475a54b467b03ccb88531"},
@ -7713,7 +7777,7 @@ description = "Fast and parallel snowball stemmer"
 optional = true
 python-versions = "*"
 groups = ["main"]
-markers = "python_version < \"3.13\" and extra == \"codegraph\""
+markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
 files = [
    {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"},
    {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"},
@ -8117,8 +8181,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0"
 colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
 dill = [
    {version = ">=0.2", markers = "python_version < \"3.11\""},
+    {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
    {version = ">=0.3.7", markers = "python_version >= \"3.12\""},
-    {version = ">=0.3.6", markers = "python_version == \"3.11\""},
 ]
 isort = ">=4.2.5,<5.13 || >5.13,<7"
 mccabe = ">=0.6,<0.8"
@ -8861,41 +8925,15 @@ files = [

 [[package]]
 name = "qdrant-client"
-version = "1.12.1"
-description = "Client library for the Qdrant vector search engine"
-optional = true
-python-versions = ">=3.8"
-groups = ["main"]
-markers = "python_version >= \"3.13\" and extra == \"qdrant\""
-files = [
-    {file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"},
-    {file = "qdrant_client-1.12.1.tar.gz", hash = "sha256:35e8e646f75b7b883b3d2d0ee4c69c5301000bba41c82aa546e985db0f1aeb72"},
-]
-
-[package.dependencies]
-grpcio = ">=1.41.0"
-grpcio-tools = ">=1.41.0"
-httpx = {version = ">=0.20.0", extras = ["http2"]}
-numpy = {version = ">=1.26", markers = "python_version >= \"3.12\""}
-portalocker = ">=2.7.0,<3.0.0"
-pydantic = ">=1.10.8"
-urllib3 = ">=1.26.14,<3"
-
-[package.extras]
-fastembed = ["fastembed (==0.3.6) ; python_version < \"3.13\""]
-fastembed-gpu = ["fastembed-gpu (==0.3.6) ; python_version < \"3.13\""]
-
-[[package]]
-name = "qdrant-client"
-version = "1.14.1"
+version = "1.14.2"
 description = "Client library for the Qdrant vector search engine"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version < \"3.13\" and extra == \"qdrant\""
+markers = "extra == \"qdrant\""
 files = [
-    {file = "qdrant_client-1.14.1-py3-none-any.whl", hash = "sha256:1c4d5ed791873698da8b5df68df16bb203ec1b0cd6cec0fd6002572a06291a1b"},
-    {file = "qdrant_client-1.14.1.tar.gz", hash = "sha256:75352057ea59fdd7987313dc9cef4d83953591d083028d94eac99cd0e5e2f607"},
+    {file = "qdrant_client-1.14.2-py3-none-any.whl", hash = "sha256:7c283b1f0e71db9c21b85d898fb395791caca2a6d56ee751da96d797b001410c"},
+    {file = "qdrant_client-1.14.2.tar.gz", hash = "sha256:da5cab4d367d099d1330b6f30d45aefc8bd76f8b8f9d8fa5d4f813501b93af0d"},
 ]

 [package.dependencies]
@ -8904,6 +8942,7 @@ httpx = {version = ">=0.20.0", extras = ["http2"]}
 numpy = [
    {version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""},
    {version = ">=1.26", markers = "python_version == \"3.12\""},
+    {version = ">=2.1.0", markers = "python_version >= \"3.13\""},
 ]
 portalocker = ">=2.7.0,<3.0.0"
 protobuf = ">=3.20.0"
@ -10485,7 +10524,7 @@ description = "A lil' TOML parser"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version < \"3.11\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")"
+markers = "python_version == \"3.10\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")"
 files = [
    {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
    {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@ -10981,7 +11020,7 @@ description = "A library that prepares raw documents for downstream ML tasks."
 optional = true
 python-versions = ">=3.9.0"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"docs\""
 files = [
    {file = "unstructured-0.16.25-py3-none-any.whl", hash = "sha256:14719ccef2830216cf1c5bf654f75e2bf07b17ca5dcee9da5ac74618130fd337"},
    {file = "unstructured-0.16.25.tar.gz", hash = "sha256:73b9b0f51dbb687af572ecdb849a6811710b9cac797ddeab8ee80fa07d8aa5e6"},
@ -11039,6 +11078,71 @@ rtf = ["pypandoc"]
 tsv = ["pandas"]
 xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]

+[[package]]
+name = "unstructured"
+version = "0.17.2"
+description = "A library that prepares raw documents for downstream ML tasks."
+optional = true
+python-versions = ">=3.9.0"
+groups = ["main"]
+markers = "python_version >= \"3.13\" and extra == \"docs\""
+files = [
+    {file = "unstructured-0.17.2-py3-none-any.whl", hash = "sha256:527dd26a4b273aebef2f9119c9d4f0d0ce17640038d92296d23abe89be123840"},
+    {file = "unstructured-0.17.2.tar.gz", hash = "sha256:af18c3caef0a6c562cf77e34ee8b6ff522b605031d2336ffe565df66f126aa46"},
+]
+
+[package.dependencies]
+backoff = "*"
+beautifulsoup4 = "*"
+chardet = "*"
+dataclasses-json = "*"
+emoji = "*"
+filetype = "*"
+html5lib = "*"
+langdetect = "*"
+lxml = "*"
+markdown = {version = "*", optional = true, markers = "extra == \"md\""}
+networkx = {version = "*", optional = true, markers = "extra == \"xlsx\""}
+nltk = "*"
+numpy = "*"
+openpyxl = {version = "*", optional = true, markers = "extra == \"xlsx\""}
+pandas = {version = "*", optional = true, markers = "extra == \"csv\" or extra == \"tsv\" or extra == \"xlsx\""}
+psutil = "*"
+pypandoc = {version = "*", optional = true, markers = "extra == \"epub\" or extra == \"odt\" or extra == \"org\" or extra == \"rst\" or extra == \"rtf\""}
+python-docx = {version = ">=1.1.2", optional = true, markers = "extra == \"doc\" or extra == \"docx\" or extra == \"odt\""}
+python-iso639 = "*"
+python-magic = "*"
+python-oxmsg = "*"
+python-pptx = {version = ">=1.0.1", optional = true, markers = "extra == \"ppt\" or extra == \"pptx\""}
+rapidfuzz = "*"
+requests = "*"
+tqdm = "*"
+typing-extensions = "*"
+unstructured-client = "*"
+wrapt = "*"
+xlrd = {version = "*", optional = true, markers = "extra == \"xlsx\""}
+
+[package.extras]
+all-docs = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
+csv = ["pandas"]
+doc = ["python-docx (>=1.1.2)"]
+docx = ["python-docx (>=1.1.2)"]
+epub = ["pypandoc"]
+huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"]
+image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
+local-inference = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
+md = ["markdown"]
+odt = ["pypandoc", "python-docx (>=1.1.2)"]
+org = ["pypandoc"]
+paddleocr = ["paddlepaddle (>=3.0.0b1)", "unstructured.paddleocr (==2.10.0)"]
+pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
+ppt = ["python-pptx (>=1.0.1)"]
+pptx = ["python-pptx (>=1.0.1)"]
+rst = ["pypandoc"]
+rtf = ["pypandoc"]
+tsv = ["pandas"]
+xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
+
 [[package]]
 name = "unstructured-client"
 version = "0.25.9"
@ -11578,7 +11682,7 @@ description = "A small Python utility to set file creation time on Windows"
 optional = true
 python-versions = ">=3.5"
 groups = ["main"]
-markers = "extra == \"codegraph\" and sys_platform == \"win32\" and python_version < \"3.13\""
+markers = "extra == \"codegraph\" and sys_platform == \"win32\" and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\")"
 files = [
    {file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"},
    {file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"},
@ -11962,7 +12066,7 @@ cffi = ["cffi (>=1.11)"]

 [extras]
 anthropic = ["anthropic"]
-api = ["gunicorn", "uvicorn"]
+api = ["gunicorn", "kuzu", "uvicorn"]
 chromadb = ["chromadb", "pypika"]
 codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
 debug = ["debugpy"]
@ -11992,4 +12096,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<=3.13"
-content-hash = "15b319ff8dbe5bd88e41ead93f4e9140b2b7d86d57a707682dd3a308e78ef245"
+content-hash = "5bd213f69d6dada714e632097121394992b46bd6d322afa024396847cb945f95"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -64,9 +64,10 @@ dependencies = [
 api = [
    "uvicorn==0.34.0",
    "gunicorn>=20.1.0,<21",
+    "kuzu==0.9.0",
 ]
 weaviate = ["weaviate-client==4.9.6"]
-qdrant = ["qdrant-client>=1.9.0,<2"]
+qdrant = ["qdrant-client>=1.14.2,<2"]
 neo4j = ["neo4j>=5.20.0,<6"]
 postgres = [
    "psycopg2>=2.9.10,<3",
@ -87,14 +88,14 @@ anthropic = ["anthropic>=0.26.1,<0.27"]
 deepeval = ["deepeval>=2.0.1,<3"]
 posthog = ["posthog>=3.5.0,<4"]
 falkordb = ["falkordb==1.0.9"]
-kuzu = ["kuzu==0.8.2"]
+kuzu = ["kuzu==0.9.0"]
 groq = ["groq==0.8.0"]
 milvus = ["pymilvus>=2.5.0,<3"]
 chromadb = [
    "chromadb>=0.3.0,<0.7",
    "pypika==0.48.8",
 ]
-docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<0.17"]
+docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<18"]
 codegraph = [
    "fastembed<=0.6.0 ; python_version < '3.13'",
    "transformers>=4.46.3,<5",
--- a/uv.lock
+++ b/uv.lock
				`@ -0,0 +1 @@`
				`from .get_or_create_dataset_database import get_or_create_dataset_database`
				`@ -0,0 +1 @@`
				`from .permission_types import PERMISSION_TYPES`
				`@ -0,0 +1 @@`
				`PERMISSION_TYPES = ["read", "write", "delete", "share"]`