From 801efeb1cb481b34c3bc83f05050f18c2fef84a5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 5 Nov 2024 21:29:56 +0100 Subject: [PATCH] fix: Resolve security concerns regarding os calls Resolved security concerns in endpoints regarding os Fix #COG-334-structure-routing --- cognee/api/v1/add/routers/get_add_router.py | 12 +++-- .../v1/cognify/routers/get_cognify_router.py | 8 +-- .../datasets/routers/get_datasets_router.py | 52 +++++++++--------- .../v1/search/routers/get_search_router.py | 11 ++-- .../settings/routers/get_settings_router.py | 53 +++++++++---------- 5 files changed, 72 insertions(+), 64 deletions(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index bb5f00ce1..1f45d0c95 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -3,12 +3,15 @@ from fastapi.responses import JSONResponse from fastapi import APIRouter from typing import List import aiohttp +import subprocess +import logging import os - from cognee.modules.users.models import User from cognee.modules.users.methods import get_authenticated_user -def get_add_router(): +logger = logging.getLogger(__name__) + +def get_add_router() -> APIRouter: router = APIRouter() @router.post("/", response_model=None) @@ -24,7 +27,7 @@ def get_add_router(): if "github" in data: # Perform git clone if the URL is from GitHub repo_name = data.split("/")[-1].replace(".git", "") - os.system(f"git clone {data} .data/{repo_name}") + subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True) await cognee_add( "data://.data/", f"{repo_name}", @@ -35,7 +38,8 @@ def get_add_router(): async with session.get(data) as resp: if resp.status == 200: file_data = await resp.read() - with open(f".data/{data.split('/')[-1]}", "wb") as f: + filename = os.path.basename(data) + with open(f".data/{filename}", "wb") as f: f.write(file_data) await cognee_add( "data://.data/", diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 987c15d8a..9616fa71c 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -6,11 +6,11 @@ from fastapi.responses import JSONResponse from cognee.modules.users.methods import get_authenticated_user from fastapi import Depends -def get_cognify_router(): - router = APIRouter() +class CognifyPayloadDTO(BaseModel): + datasets: List[str] - class CognifyPayloadDTO(BaseModel): - datasets: List[str] +def get_cognify_router() -> APIRouter: + router = APIRouter() @router.post("/", response_model=None) async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)): diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index 9e9942818..698e6bb9e 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -13,20 +13,30 @@ from cognee.modules.users.models import User from cognee.modules.users.methods import get_authenticated_user from cognee.modules.pipelines.models import PipelineRunStatus -def get_datasets_router(): - logger = logging.getLogger(__name__) +logger = logging.getLogger(__name__) + +class ErrorResponseDTO(BaseModel): + message: str + +class DatasetDTO(OutDTO): + id: UUID + name: str + created_at: datetime + updated_at: Optional[datetime] + owner_id: UUID + +class DataDTO(OutDTO): + id: UUID + name: str + created_at: datetime + updated_at: Optional[datetime] + extension: str + mime_type: str + raw_data_location: str + +def get_datasets_router() -> APIRouter: router = APIRouter() - class ErrorResponseDTO(BaseModel): - message: str - - class DatasetDTO(OutDTO): - id: UUID - name: str - created_at: datetime - updated_at: Optional[datetime] - owner_id: UUID - @router.get("/", response_model=list[DatasetDTO]) async def get_datasets(user: User = Depends(get_authenticated_user)): try: @@ -96,15 +106,6 @@ def get_datasets_router(): content="Graphistry credentials are not set. Please set them in your .env file.", ) - class DataDTO(OutDTO): - id: UUID - name: str - created_at: datetime - updated_at: Optional[datetime] - extension: str - mime_type: str - raw_data_location: str - @router.get("/{dataset_id}/data", response_model=list[DataDTO], responses={404: {"model": ErrorResponseDTO}}) async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)): @@ -157,11 +158,12 @@ def get_datasets_router(): dataset_data = await get_dataset_data(dataset.id) if dataset_data is None: - raise HTTPException(status_code=404, detail=f"Dataset ({dataset_id}) not found.") + raise HTTPException(status_code=404, detail=f"No data found in dataset ({dataset_id}).") - data = [data for data in dataset_data if str(data.id) == data_id][0] + matching_data = [data for data in dataset_data if str(data.id) == data_id] - if data is None: + # Check if matching_data contains an element + if len(matching_data) == 0: return JSONResponse( status_code=404, content={ @@ -169,6 +171,8 @@ def get_datasets_router(): } ) + data = matching_data[0] + return data.raw_data_location return router \ No newline at end of file diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index ba5fae96f..5df49635f 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -5,12 +5,13 @@ from fastapi import Depends, APIRouter from cognee.api.DTO import InDTO from cognee.modules.users.methods import get_authenticated_user -def get_search_router(): - router = APIRouter() - class SearchPayloadDTO(InDTO): - search_type: SearchType - query: str +class SearchPayloadDTO(InDTO): + search_type: SearchType + query: str + +def get_search_router() -> APIRouter: + router = APIRouter() @router.post("/", response_model = list) async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)): diff --git a/cognee/api/v1/settings/routers/get_settings_router.py b/cognee/api/v1/settings/routers/get_settings_router.py index 7f3b09516..31692382b 100644 --- a/cognee/api/v1/settings/routers/get_settings_router.py +++ b/cognee/api/v1/settings/routers/get_settings_router.py @@ -4,41 +4,40 @@ from typing import Union, Optional, Literal from cognee.modules.users.methods import get_authenticated_user from fastapi import Depends from cognee.modules.users.models import User +from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig -def get_settings_router(): +class LLMConfigOutputDTO(OutDTO, LLMConfig): + pass + +class VectorDBConfigOutputDTO(OutDTO, VectorDBConfig): + pass + +class SettingsDTO(OutDTO): + llm: LLMConfigOutputDTO + vector_db: VectorDBConfigOutputDTO + +class LLMConfigInputDTO(InDTO): + provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]] + model: str + api_key: str + +class VectorDBConfigInputDTO(InDTO): + provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]] + url: str + api_key: str + +class SettingsPayloadDTO(InDTO): + llm: Optional[LLMConfigInputDTO] = None + vector_db: Optional[VectorDBConfigInputDTO] = None + +def get_settings_router() -> APIRouter: router = APIRouter() - from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig - - class LLMConfigDTO(OutDTO, LLMConfig): - pass - - class VectorDBConfigDTO(OutDTO, VectorDBConfig): - pass - - class SettingsDTO(OutDTO): - llm: LLMConfigDTO - vector_db: VectorDBConfigDTO - @router.get("/", response_model=SettingsDTO) async def get_settings(user: User = Depends(get_authenticated_user)): from cognee.modules.settings import get_settings as get_cognee_settings return get_cognee_settings() - class LLMConfigDTO(InDTO): - provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]] - model: str - api_key: str - - class VectorDBConfigDTO(InDTO): - provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]] - url: str - api_key: str - - class SettingsPayloadDTO(InDTO): - llm: Optional[LLMConfigDTO] = None - vector_db: Optional[VectorDBConfigDTO] = None - @router.post("/", response_model=None) async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)): from cognee.modules.settings import save_llm_config, save_vector_db_config