From bcd418151aff2b623d17d1a824594b7b002f8ee6 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Fri, 11 Jul 2025 16:10:02 +0200 Subject: [PATCH] fix: Secure api v2 (#1060) ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/api/v1/add/routers/get_add_router.py | 8 ++++++-- .../v1/datasets/routers/get_datasets_router.py | 16 +++++++++++----- .../api/v1/delete/routers/get_delete_router.py | 6 ++++-- .../api/v1/users/routers/get_visualize_router.py | 16 ++++++++++++++-- .../tasks/ingestion/save_data_item_to_storage.py | 4 +++- 5 files changed, 38 insertions(+), 12 deletions(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index 3616b372c..a8d64006a 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -1,3 +1,4 @@ +import os from uuid import UUID from fastapi import Form, UploadFile, Depends @@ -31,8 +32,11 @@ def get_add_router() -> APIRouter: raise ValueError("Either datasetId or datasetName must be provided.") try: - # TODO: Add check if HTTP Requests are enabled before allowing requests and git clone - if isinstance(data, str) and data.startswith("http"): + if ( + isinstance(data, str) + and data.startswith("http") + and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true") + ): if "github" in data: # Perform git clone if the URL is from GitHub repo_name = data.split("/")[-1].replace(".git", "") diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index 41c00e928..ff3b46899 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -10,6 +10,7 @@ from fastapi.responses import JSONResponse, FileResponse from cognee.api.DTO import InDTO, OutDTO from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.modules.data.methods import create_dataset, get_datasets_by_name from cognee.shared.logging_utils import get_logger from cognee.api.v1.delete.exceptions import DataNotFoundError, DatasetNotFoundError @@ -177,7 +178,8 @@ def get_datasets_router() -> APIRouter: async def get_dataset_data(dataset_id: UUID, user: User = Depends(get_authenticated_user)): from cognee.modules.data.methods import get_dataset_data, get_dataset - dataset = await get_dataset(user.id, dataset_id) + # Verify user has permission to read dataset + dataset = await get_authorized_existing_datasets([dataset_id], "read", user) if dataset is None: return JSONResponse( @@ -185,7 +187,7 @@ def get_datasets_router() -> APIRouter: content=ErrorResponseDTO(f"Dataset ({str(dataset_id)}) not found."), ) - dataset_data = await get_dataset_data(dataset_id=dataset.id) + dataset_data = await get_dataset_data(dataset_id=dataset[0].id) if dataset_data is None: return [] @@ -200,6 +202,9 @@ def get_datasets_router() -> APIRouter: from cognee.api.v1.datasets.datasets import datasets as cognee_datasets try: + # Verify user has permission to read dataset + await get_authorized_existing_datasets(datasets, "read", user) + datasets_statuses = await cognee_datasets.get_status(datasets) return datasets_statuses @@ -211,16 +216,17 @@ def get_datasets_router() -> APIRouter: dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user) ): from cognee.modules.data.methods import get_data - from cognee.modules.data.methods import get_dataset, get_dataset_data + from cognee.modules.data.methods import get_dataset_data - dataset = await get_dataset(user.id, dataset_id) + # Verify user has permission to read dataset + dataset = await get_authorized_existing_datasets([dataset_id], "read", user) if dataset is None: return JSONResponse( status_code=404, content={"detail": f"Dataset ({dataset_id}) not found."} ) - dataset_data = await get_dataset_data(dataset.id) + dataset_data = await get_dataset_data(dataset[0].id) if dataset_data is None: raise DataNotFoundError(message=f"No data found in dataset ({dataset_id}).") diff --git a/cognee/api/v1/delete/routers/get_delete_router.py b/cognee/api/v1/delete/routers/get_delete_router.py index 3684caefa..1482ba75c 100644 --- a/cognee/api/v1/delete/routers/get_delete_router.py +++ b/cognee/api/v1/delete/routers/get_delete_router.py @@ -1,3 +1,4 @@ +import os from fastapi import Form, UploadFile, Depends from fastapi.responses import JSONResponse from fastapi import APIRouter @@ -37,8 +38,9 @@ def get_delete_router() -> APIRouter: # Handle each file in the list results = [] for file in data: - # TODO: Add check if HTTP Requests are enabled before allowing requests and git clone - if file.filename.startswith("http"): + if file.filename.startswith("http") and ( + os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true" + ): if "github" in file.filename: # For GitHub repos, we need to get the content hash of each file repo_name = file.filename.split("/")[-1].replace(".git", "") diff --git a/cognee/api/v1/users/routers/get_visualize_router.py b/cognee/api/v1/users/routers/get_visualize_router.py index 0710f333f..5faa005da 100644 --- a/cognee/api/v1/users/routers/get_visualize_router.py +++ b/cognee/api/v1/users/routers/get_visualize_router.py @@ -1,6 +1,12 @@ -from fastapi import APIRouter +from fastapi import APIRouter, Depends from fastapi.responses import HTMLResponse, JSONResponse +from uuid import UUID from cognee.shared.logging_utils import get_logger +from cognee.modules.users.methods import get_authenticated_user +from cognee.modules.data.methods import get_authorized_existing_datasets +from cognee.modules.users.models import User + +from cognee.context_global_variables import set_database_global_context_variables logger = get_logger() @@ -9,11 +15,17 @@ def get_visualize_router() -> APIRouter: router = APIRouter() @router.get("", response_model=None) - async def visualize(): + async def visualize(dataset_id: UUID, user: User = Depends(get_authenticated_user)): """This endpoint is responsible for adding data to the graph.""" from cognee.api.v1.visualize import visualize_graph try: + # Verify user has permission to read dataset + dataset = await get_authorized_existing_datasets([dataset_id], "read", user) + + # Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True + await set_database_global_context_variables(dataset[0].id, dataset[0].owner_id) + html_visualization = await visualize_graph() return HTMLResponse(html_visualization) diff --git a/cognee/tasks/ingestion/save_data_item_to_storage.py b/cognee/tasks/ingestion/save_data_item_to_storage.py index fefd8728b..6585535dd 100644 --- a/cognee/tasks/ingestion/save_data_item_to_storage.py +++ b/cognee/tasks/ingestion/save_data_item_to_storage.py @@ -1,3 +1,4 @@ +import os from typing import Union, BinaryIO, Any from cognee.modules.ingestion.exceptions import IngestionError @@ -20,7 +21,8 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any], datase file_path = data_item # data is a file path elif data_item.startswith("file://") or data_item.startswith("/"): - # TODO: Add check if ACCEPT_LOCAL_FILE_PATH is enabled, if it's not raise an error + if os.getenv("ACCEPT_LOCAL_FILE_PATH", "true").lower() == "false": + raise IngestionError(message="Local files are not accepted.") file_path = data_item.replace("file://", "") # data is text else: