From a2d16c99a188b227aa2d69afc2079f5e54095bcd Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:18:57 +0200 Subject: [PATCH] fix: Add docstrings to routers (#1081) ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Igor Ilic --- cognee/api/v1/add/routers/get_add_router.py | 37 +++- .../v1/cognify/routers/get_cognify_router.py | 66 +++++- .../datasets/routers/get_datasets_router.py | 195 ++++++++++++++++-- .../routers/get_permissions_router.py | 96 +++++++++ .../v1/search/routers/get_search_router.py | 45 +++- .../settings/routers/get_settings_router.py | 38 ++++ .../v1/users/routers/get_visualize_router.py | 21 +- 7 files changed, 473 insertions(+), 25 deletions(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index a8d64006a..0d0e64b4f 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -25,7 +25,42 @@ def get_add_router() -> APIRouter: datasetId: Optional[UUID] = Form(default=None), user: User = Depends(get_authenticated_user), ): - """This endpoint is responsible for adding data to the graph.""" + """ + Add data to a dataset for processing and knowledge graph construction. + + This endpoint accepts various types of data (files, URLs, GitHub repositories) + and adds them to a specified dataset for processing. The data is ingested, + analyzed, and integrated into the knowledge graph. Either datasetName or + datasetId must be provided to specify the target dataset. + + Args: + data (List[UploadFile]): List of files to upload. Can also include: + - HTTP URLs (if ALLOW_HTTP_REQUESTS is enabled) + - GitHub repository URLs (will be cloned and processed) + - Regular file uploads + datasetName (Optional[str]): Name of the dataset to add data to + datasetId (Optional[UUID]): UUID of the dataset to add data to + user: The authenticated user adding the data + + Returns: + dict: Information about the add operation containing: + - Status of the operation + - Details about the processed data + - Any relevant metadata from the ingestion process + + Raises: + ValueError: If neither datasetId nor datasetName is provided + HTTPException: If there's an error during the add operation + PermissionDeniedError: If the user doesn't have permission to add to the dataset + + Note: + - To add data to a datasets not owned by the user and for which the user has write permission for + the dataset_id must be used (when ENABLE_BACKEND_ACCESS_CONTROL is set to True) + - GitHub repositories are cloned and all files are processed + - HTTP URLs are fetched and their content is processed + - Regular files are uploaded and processed directly + - The ALLOW_HTTP_REQUESTS environment variable controls URL processing + """ from cognee.api.v1.add import add as cognee_add if not datasetId and not datasetName: diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index c4c3d9525..42b49d7d2 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -41,7 +41,71 @@ def get_cognify_router() -> APIRouter: @router.post("", response_model=dict) async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)): - """This endpoint is responsible for the cognitive processing of the content.""" + """ + Transform datasets into structured knowledge graphs through cognitive processing. + + This endpoint is the core of Cognee's intelligence layer, responsible for converting + raw text, documents, and data added through the add endpoint into semantic knowledge graphs. + It performs deep analysis to extract entities, relationships, and insights from ingested content. + + The processing pipeline includes: + 1. Document classification and permission validation + 2. Text chunking and semantic segmentation + 3. Entity extraction using LLM-powered analysis + 4. Relationship detection and graph construction + 5. Vector embeddings generation for semantic search + 6. Content summarization and indexing + + Args: + payload (CognifyPayloadDTO): Request payload containing processing parameters: + - datasets (Optional[List[str]]): List of dataset names to process. + Dataset names are resolved to datasets owned by the authenticated user. + - dataset_ids (Optional[List[UUID]]): List of dataset UUIDs to process. + UUIDs allow processing of datasets not owned by the user (if permitted). + - graph_model (Optional[BaseModel]): Custom Pydantic model defining the + knowledge graph schema. Defaults to KnowledgeGraph for general-purpose + processing. Custom models enable domain-specific entity extraction. + - run_in_background (Optional[bool]): Whether to execute processing + asynchronously. Defaults to False (blocking). + + user (User): Authenticated user context injected via dependency injection. + Used for permission validation and data access control. + + Returns: + dict: Processing results containing: + - For blocking execution: Complete pipeline run information with + entity counts, processing duration, and success/failure status + - For background execution: Pipeline run metadata including + pipeline_run_id for status monitoring via WebSocket subscription + + Raises: + HTTPException 400: Bad Request + - When neither datasets nor dataset_ids are provided + - When specified datasets don't exist or are inaccessible + + HTTPException 409: Conflict + - When processing fails due to system errors + - When LLM API keys are missing or invalid + - When database connections fail + - When content cannot be processed (corrupted files, unsupported formats) + + Example Usage: + ```python + # Process specific datasets synchronously + POST /api/v1/cognify + { + "datasets": ["research_papers", "documentation"], + "run_in_background": false + } + ``` + Notes: + To cognify data in a datasets not owned by the user and for which the current user has write permission for + the dataset_id must be used (when ENABLE_BACKEND_ACCESS_CONTROL is set to True) + + Next Steps: + After successful processing, use the search endpoints to query the + generated knowledge graph for insights, relationships, and semantic search. + """ if not payload.datasets and not payload.dataset_ids: return JSONResponse( status_code=400, content={"error": "No datasets or dataset_ids provided"} diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index ff3b46899..4ed36f9e1 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -74,6 +74,27 @@ def get_datasets_router() -> APIRouter: @router.get("", response_model=list[DatasetDTO]) async def get_datasets(user: User = Depends(get_authenticated_user)): + """ + Get all datasets accessible to the authenticated user. + + This endpoint retrieves all datasets that the authenticated user has + read permissions for. The datasets are returned with their metadata + including ID, name, creation time, and owner information. + + Args: + user: The authenticated user requesting the datasets + + Returns: + List[DatasetDTO]: A list of dataset objects containing: + - id: Unique dataset identifier + - name: Dataset name + - created_at: When the dataset was created + - updated_at: When the dataset was last updated + - owner_id: ID of the dataset owner + + Raises: + HTTPException: If there's an error retrieving the datasets + """ try: datasets = await get_all_user_permission_datasets(user, "read") @@ -89,6 +110,30 @@ def get_datasets_router() -> APIRouter: async def create_new_dataset( dataset_data: DatasetCreationPayload, user: User = Depends(get_authenticated_user) ): + """ + Create a new dataset or return existing dataset with the same name. + + This endpoint creates a new dataset with the specified name. If a dataset + with the same name already exists for the user, it returns the existing + dataset instead of creating a duplicate. The user is automatically granted + all permissions (read, write, share, delete) on the created dataset. + + Args: + dataset_data (DatasetCreationPayload): Dataset creation parameters containing: + - name: The name for the new dataset + user: The authenticated user creating the dataset + + Returns: + DatasetDTO: The created or existing dataset object containing: + - id: Unique dataset identifier + - name: Dataset name + - created_at: When the dataset was created + - updated_at: When the dataset was last updated + - owner_id: ID of the dataset owner + + Raises: + HTTPException: If there's an error creating the dataset + """ try: datasets = await get_datasets_by_name([dataset_data.name], user.id) @@ -118,6 +163,23 @@ def get_datasets_router() -> APIRouter: "/{dataset_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}} ) async def delete_dataset(dataset_id: UUID, user: User = Depends(get_authenticated_user)): + """ + Delete a dataset by its ID. + + This endpoint permanently deletes a dataset and all its associated data. + The user must have delete permissions on the dataset to perform this operation. + + Args: + dataset_id (UUID): The unique identifier of the dataset to delete + user: The authenticated user requesting the deletion + + Returns: + None: No content returned on successful deletion + + Raises: + DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access + HTTPException: If there's an error during deletion + """ from cognee.modules.data.methods import get_dataset, delete_dataset dataset = await get_dataset(user.id, dataset_id) @@ -135,6 +197,26 @@ def get_datasets_router() -> APIRouter: async def delete_data( dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user) ): + """ + Delete a specific data item from a dataset. + + This endpoint removes a specific data item from a dataset while keeping + the dataset itself intact. The user must have delete permissions on the + dataset to perform this operation. + + Args: + dataset_id (UUID): The unique identifier of the dataset containing the data + data_id (UUID): The unique identifier of the data item to delete + user: The authenticated user requesting the deletion + + Returns: + None: No content returned on successful deletion + + Raises: + DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access + DataNotFoundError: If the data item doesn't exist in the dataset + HTTPException: If there's an error during deletion + """ from cognee.modules.data.methods import get_data, delete_data from cognee.modules.data.methods import get_dataset @@ -153,22 +235,36 @@ def get_datasets_router() -> APIRouter: @router.get("/{dataset_id}/graph", response_model=GraphDTO) async def get_dataset_graph(dataset_id: UUID, user: User = Depends(get_authenticated_user)): - try: - from cognee.modules.data.methods import get_dataset + """ + Get the knowledge graph visualization for a dataset. - dataset = await get_dataset(user.id, dataset_id) + This endpoint retrieves the knowledge graph data for a specific dataset, + including nodes and edges that represent the relationships between entities + in the dataset. The graph data is formatted for visualization purposes. - formatted_graph_data = await get_formatted_graph_data(dataset.id, user.id) + Args: + dataset_id (UUID): The unique identifier of the dataset + user: The authenticated user requesting the graph data - return JSONResponse( - status_code=200, - content=formatted_graph_data, - ) - except Exception: - return JSONResponse( - status_code=409, - content="Error retrieving dataset graph data.", - ) + Returns: + GraphDTO: The graph data containing: + - nodes: List of graph nodes with id, label, and properties + - edges: List of graph edges with source, target, and label + + Raises: + DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access + HTTPException: If there's an error retrieving the graph data + """ + from cognee.modules.data.methods import get_dataset + + dataset = await get_dataset(user.id, dataset_id) + + if dataset is None: + raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.") + + graph_data = await get_formatted_graph_data(dataset) + + return graph_data @router.get( "/{dataset_id}/data", @@ -176,6 +272,31 @@ def get_datasets_router() -> APIRouter: responses={404: {"model": ErrorResponseDTO}}, ) async def get_dataset_data(dataset_id: UUID, user: User = Depends(get_authenticated_user)): + """ + Get all data items in a dataset. + + This endpoint retrieves all data items (documents, files, etc.) that belong + to a specific dataset. Each data item includes metadata such as name, type, + creation time, and storage location. + + Args: + dataset_id (UUID): The unique identifier of the dataset + user: The authenticated user requesting the data + + Returns: + List[DataDTO]: A list of data objects containing: + - id: Unique data item identifier + - name: Data item name + - created_at: When the data was added + - updated_at: When the data was last updated + - extension: File extension + - mime_type: MIME type of the data + - raw_data_location: Storage location of the raw data + + Raises: + DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access + HTTPException: If there's an error retrieving the data + """ from cognee.modules.data.methods import get_dataset_data, get_dataset # Verify user has permission to read dataset @@ -199,22 +320,54 @@ def get_datasets_router() -> APIRouter: datasets: Annotated[List[UUID], Query(alias="dataset")] = None, user: User = Depends(get_authenticated_user), ): - from cognee.api.v1.datasets.datasets import datasets as cognee_datasets + """ + Get the processing status of datasets. - try: - # Verify user has permission to read dataset - await get_authorized_existing_datasets(datasets, "read", user) + This endpoint retrieves the current processing status of one or more datasets, + indicating whether they are being processed, have completed processing, or + encountered errors during pipeline execution. - datasets_statuses = await cognee_datasets.get_status(datasets) + Args: + datasets: List of dataset UUIDs to check status for (query parameter "dataset") + user: The authenticated user requesting the status - return datasets_statuses - except Exception as error: - return JSONResponse(status_code=409, content={"error": str(error)}) + Returns: + Dict[str, PipelineRunStatus]: A dictionary mapping dataset IDs to their + processing status (e.g., "pending", "running", "completed", "failed") + + Raises: + HTTPException: If there's an error retrieving the status information + """ + from cognee.modules.data.methods import get_dataset_status + + dataset_status = await get_dataset_status(datasets, user.id) + + return dataset_status @router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse) async def get_raw_data( dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user) ): + """ + Download the raw data file for a specific data item. + + This endpoint allows users to download the original, unprocessed data file + for a specific data item within a dataset. The file is returned as a direct + download with appropriate headers. + + Args: + dataset_id (UUID): The unique identifier of the dataset containing the data + data_id (UUID): The unique identifier of the data item to download + user: The authenticated user requesting the download + + Returns: + FileResponse: The raw data file as a downloadable response + + Raises: + DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access + DataNotFoundError: If the data item doesn't exist in the dataset + HTTPException: If there's an error accessing the raw data file + """ from cognee.modules.data.methods import get_data from cognee.modules.data.methods import get_dataset_data diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 282efa2b5..9ec57e012 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -18,6 +18,26 @@ def get_permissions_router() -> APIRouter: principal_id: UUID, user: User = Depends(get_authenticated_user), ): + """ + Grant permission on datasets to a principal (user or role). + + This endpoint allows granting specific permissions on one or more datasets + to a principal (which can be a user or role). The authenticated user must + have appropriate permissions to grant access to the specified datasets. + + Args: + permission_name (str): The name of the permission to grant (e.g., "read", "write", "delete") + dataset_ids (List[UUID]): List of dataset UUIDs to grant permission on + principal_id (UUID): The UUID of the principal (user or role) to grant permission to + user: The authenticated user granting the permission + + Returns: + JSONResponse: Success message indicating permission was assigned + + Raises: + HTTPException: If there's an error granting the permission + PermissionDeniedError: If the user doesn't have permission to grant access + """ from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets await authorized_give_permission_on_datasets( @@ -33,6 +53,24 @@ def get_permissions_router() -> APIRouter: @permissions_router.post("/roles") async def create_role(role_name: str, user: User = Depends(get_authenticated_user)): + """ + Create a new role. + + This endpoint creates a new role with the specified name. Roles are used + to group permissions and can be assigned to users to manage access control + more efficiently. The authenticated user becomes the owner of the created role. + + Args: + role_name (str): The name of the role to create + user: The authenticated user creating the role + + Returns: + JSONResponse: Success message indicating the role was created + + Raises: + HTTPException: If there's an error creating the role + ValidationError: If the role name is invalid or already exists + """ from cognee.modules.users.roles.methods import create_role as create_role_method await create_role_method(role_name=role_name, owner_id=user.id) @@ -43,6 +81,26 @@ def get_permissions_router() -> APIRouter: async def add_user_to_role( user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user) ): + """ + Add a user to a role. + + This endpoint assigns a user to a specific role, granting them all the + permissions associated with that role. The authenticated user must be + the owner of the role or have appropriate administrative permissions. + + Args: + user_id (UUID): The UUID of the user to add to the role + role_id (UUID): The UUID of the role to assign the user to + user: The authenticated user performing the role assignment + + Returns: + JSONResponse: Success message indicating the user was added to the role + + Raises: + HTTPException: If there's an error adding the user to the role + PermissionDeniedError: If the user doesn't have permission to assign roles + ValidationError: If the user or role doesn't exist + """ from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id) @@ -53,6 +111,26 @@ def get_permissions_router() -> APIRouter: async def add_user_to_tenant( user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user) ): + """ + Add a user to a tenant. + + This endpoint assigns a user to a specific tenant, allowing them to access + resources and data associated with that tenant. The authenticated user must + be the owner of the tenant or have appropriate administrative permissions. + + Args: + user_id (UUID): The UUID of the user to add to the tenant + tenant_id (UUID): The UUID of the tenant to assign the user to + user: The authenticated user performing the tenant assignment + + Returns: + JSONResponse: Success message indicating the user was added to the tenant + + Raises: + HTTPException: If there's an error adding the user to the tenant + PermissionDeniedError: If the user doesn't have permission to assign tenants + ValidationError: If the user or tenant doesn't exist + """ from cognee.modules.users.tenants.methods import add_user_to_tenant await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id) @@ -61,6 +139,24 @@ def get_permissions_router() -> APIRouter: @permissions_router.post("/tenants") async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)): + """ + Create a new tenant. + + This endpoint creates a new tenant with the specified name. Tenants are used + to organize users and resources in multi-tenant environments, providing + isolation and access control between different groups or organizations. + + Args: + tenant_name (str): The name of the tenant to create + user: The authenticated user creating the tenant + + Returns: + JSONResponse: Success message indicating the tenant was created + + Raises: + HTTPException: If there's an error creating the tenant + ValidationError: If the tenant name is invalid or already exists + """ from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method await create_tenant_method(tenant_name=tenant_name, user_id=user.id) diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index 9baa1db03..e8f7e40b8 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -32,6 +32,22 @@ def get_search_router() -> APIRouter: @router.get("", response_model=list[SearchHistoryItem]) async def get_search_history(user: User = Depends(get_authenticated_user)): + """ + Get search history for the authenticated user. + + This endpoint retrieves the search history for the authenticated user, + returning a list of previously executed searches with their timestamps. + + Returns: + List[SearchHistoryItem]: A list of search history items containing: + - id: Unique identifier for the search + - text: The search query text + - user: User who performed the search + - created_at: When the search was performed + + Raises: + HTTPException: If there's an error retrieving the search history + """ try: history = await get_history(user.id, limit=0) @@ -41,7 +57,34 @@ def get_search_router() -> APIRouter: @router.post("", response_model=list) async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)): - """This endpoint is responsible for searching for nodes in the graph.""" + """ + Search for nodes in the graph database. + + This endpoint performs semantic search across the knowledge graph to find + relevant nodes based on the provided query. It supports different search + types and can be scoped to specific datasets. + + Args: + payload (SearchPayloadDTO): Search parameters containing: + - search_type: Type of search to perform (SearchType) + - datasets: Optional list of dataset names to search within + - dataset_ids: Optional list of dataset UUIDs to search within + - query: The search query string + - top_k: Maximum number of results to return (default: 10) + user: The authenticated user performing the search + + Returns: + List: A list of search results containing relevant nodes from the graph + + Raises: + HTTPException: If there's an error during the search operation + PermissionDeniedError: If user doesn't have permission to search datasets + + Note: + - Datasets sent by name will only map to datasets owned by the request sender + - To search datasets not owned by the request sender, dataset UUID is needed + - If permission is denied, returns empty list instead of error + """ from cognee.api.v1.search import search as cognee_search try: diff --git a/cognee/api/v1/settings/routers/get_settings_router.py b/cognee/api/v1/settings/routers/get_settings_router.py index 4c81c07bb..6fd4c811e 100644 --- a/cognee/api/v1/settings/routers/get_settings_router.py +++ b/cognee/api/v1/settings/routers/get_settings_router.py @@ -48,6 +48,24 @@ def get_settings_router() -> APIRouter: @router.get("", response_model=SettingsDTO) async def get_settings(user: User = Depends(get_authenticated_user)): + """ + Get the current system settings. + + This endpoint retrieves the current configuration settings for the system, + including LLM (Large Language Model) configuration and vector database + configuration. These settings determine how the system processes and stores data. + + Args: + user: The authenticated user requesting the settings + + Returns: + SettingsDTO: The current system settings containing: + - llm: LLM configuration (provider, model, API key) + - vector_db: Vector database configuration (provider, URL, API key) + + Raises: + HTTPException: If there's an error retrieving the settings + """ from cognee.modules.settings import get_settings as get_cognee_settings return get_cognee_settings() @@ -56,6 +74,26 @@ def get_settings_router() -> APIRouter: async def save_settings( new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user) ): + """ + Save or update system settings. + + This endpoint allows updating the system configuration settings. You can + update either the LLM configuration, vector database configuration, or both. + Only provided settings will be updated; others remain unchanged. + + Args: + new_settings (SettingsPayloadDTO): The settings to update containing: + - llm: Optional LLM configuration (provider, model, API key) + - vector_db: Optional vector database configuration (provider, URL, API key) + user: The authenticated user making the changes + + Returns: + None: No content returned on successful save + + Raises: + HTTPException: If there's an error saving the settings + ValidationError: If the provided settings are invalid + """ from cognee.modules.settings import save_llm_config, save_vector_db_config if new_settings.llm is not None: diff --git a/cognee/api/v1/users/routers/get_visualize_router.py b/cognee/api/v1/users/routers/get_visualize_router.py index 5faa005da..f2ad2966e 100644 --- a/cognee/api/v1/users/routers/get_visualize_router.py +++ b/cognee/api/v1/users/routers/get_visualize_router.py @@ -16,7 +16,26 @@ def get_visualize_router() -> APIRouter: @router.get("", response_model=None) async def visualize(dataset_id: UUID, user: User = Depends(get_authenticated_user)): - """This endpoint is responsible for adding data to the graph.""" + """ + Generate an HTML visualization of the dataset's knowledge graph. + + This endpoint creates an interactive HTML visualization of the knowledge graph + for a specific dataset. The visualization displays nodes and edges representing + entities and their relationships, allowing users to explore the graph structure + visually. The user must have read permissions on the dataset. + + Args: + dataset_id (UUID): The unique identifier of the dataset to visualize + user: The authenticated user requesting the visualization + + Returns: + HTMLResponse: An HTML page containing the interactive graph visualization + + Raises: + HTTPException: If there's an error generating the visualization + PermissionDeniedError: If the user doesn't have permission to read the dataset + DatasetNotFoundError: If the dataset doesn't exist + """ from cognee.api.v1.visualize import visualize_graph try: