fix: Add docstrings to routers (#1081)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Igor Ilic <igorilic03@gmail.com>
This commit is contained in:
Vasilije 2025-07-14 15:18:57 +02:00 committed by GitHub
parent 9fd300112d
commit a2d16c99a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 473 additions and 25 deletions

View file

@ -25,7 +25,42 @@ def get_add_router() -> APIRouter:
datasetId: Optional[UUID] = Form(default=None),
user: User = Depends(get_authenticated_user),
):
"""This endpoint is responsible for adding data to the graph."""
"""
Add data to a dataset for processing and knowledge graph construction.
This endpoint accepts various types of data (files, URLs, GitHub repositories)
and adds them to a specified dataset for processing. The data is ingested,
analyzed, and integrated into the knowledge graph. Either datasetName or
datasetId must be provided to specify the target dataset.
Args:
data (List[UploadFile]): List of files to upload. Can also include:
- HTTP URLs (if ALLOW_HTTP_REQUESTS is enabled)
- GitHub repository URLs (will be cloned and processed)
- Regular file uploads
datasetName (Optional[str]): Name of the dataset to add data to
datasetId (Optional[UUID]): UUID of the dataset to add data to
user: The authenticated user adding the data
Returns:
dict: Information about the add operation containing:
- Status of the operation
- Details about the processed data
- Any relevant metadata from the ingestion process
Raises:
ValueError: If neither datasetId nor datasetName is provided
HTTPException: If there's an error during the add operation
PermissionDeniedError: If the user doesn't have permission to add to the dataset
Note:
- To add data to a datasets not owned by the user and for which the user has write permission for
the dataset_id must be used (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
- GitHub repositories are cloned and all files are processed
- HTTP URLs are fetched and their content is processed
- Regular files are uploaded and processed directly
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
"""
from cognee.api.v1.add import add as cognee_add
if not datasetId and not datasetName:

View file

@ -41,7 +41,71 @@ def get_cognify_router() -> APIRouter:
@router.post("", response_model=dict)
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
"""This endpoint is responsible for the cognitive processing of the content."""
"""
Transform datasets into structured knowledge graphs through cognitive processing.
This endpoint is the core of Cognee's intelligence layer, responsible for converting
raw text, documents, and data added through the add endpoint into semantic knowledge graphs.
It performs deep analysis to extract entities, relationships, and insights from ingested content.
The processing pipeline includes:
1. Document classification and permission validation
2. Text chunking and semantic segmentation
3. Entity extraction using LLM-powered analysis
4. Relationship detection and graph construction
5. Vector embeddings generation for semantic search
6. Content summarization and indexing
Args:
payload (CognifyPayloadDTO): Request payload containing processing parameters:
- datasets (Optional[List[str]]): List of dataset names to process.
Dataset names are resolved to datasets owned by the authenticated user.
- dataset_ids (Optional[List[UUID]]): List of dataset UUIDs to process.
UUIDs allow processing of datasets not owned by the user (if permitted).
- graph_model (Optional[BaseModel]): Custom Pydantic model defining the
knowledge graph schema. Defaults to KnowledgeGraph for general-purpose
processing. Custom models enable domain-specific entity extraction.
- run_in_background (Optional[bool]): Whether to execute processing
asynchronously. Defaults to False (blocking).
user (User): Authenticated user context injected via dependency injection.
Used for permission validation and data access control.
Returns:
dict: Processing results containing:
- For blocking execution: Complete pipeline run information with
entity counts, processing duration, and success/failure status
- For background execution: Pipeline run metadata including
pipeline_run_id for status monitoring via WebSocket subscription
Raises:
HTTPException 400: Bad Request
- When neither datasets nor dataset_ids are provided
- When specified datasets don't exist or are inaccessible
HTTPException 409: Conflict
- When processing fails due to system errors
- When LLM API keys are missing or invalid
- When database connections fail
- When content cannot be processed (corrupted files, unsupported formats)
Example Usage:
```python
# Process specific datasets synchronously
POST /api/v1/cognify
{
"datasets": ["research_papers", "documentation"],
"run_in_background": false
}
```
Notes:
To cognify data in a datasets not owned by the user and for which the current user has write permission for
the dataset_id must be used (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
Next Steps:
After successful processing, use the search endpoints to query the
generated knowledge graph for insights, relationships, and semantic search.
"""
if not payload.datasets and not payload.dataset_ids:
return JSONResponse(
status_code=400, content={"error": "No datasets or dataset_ids provided"}

View file

@ -74,6 +74,27 @@ def get_datasets_router() -> APIRouter:
@router.get("", response_model=list[DatasetDTO])
async def get_datasets(user: User = Depends(get_authenticated_user)):
"""
Get all datasets accessible to the authenticated user.
This endpoint retrieves all datasets that the authenticated user has
read permissions for. The datasets are returned with their metadata
including ID, name, creation time, and owner information.
Args:
user: The authenticated user requesting the datasets
Returns:
List[DatasetDTO]: A list of dataset objects containing:
- id: Unique dataset identifier
- name: Dataset name
- created_at: When the dataset was created
- updated_at: When the dataset was last updated
- owner_id: ID of the dataset owner
Raises:
HTTPException: If there's an error retrieving the datasets
"""
try:
datasets = await get_all_user_permission_datasets(user, "read")
@ -89,6 +110,30 @@ def get_datasets_router() -> APIRouter:
async def create_new_dataset(
dataset_data: DatasetCreationPayload, user: User = Depends(get_authenticated_user)
):
"""
Create a new dataset or return existing dataset with the same name.
This endpoint creates a new dataset with the specified name. If a dataset
with the same name already exists for the user, it returns the existing
dataset instead of creating a duplicate. The user is automatically granted
all permissions (read, write, share, delete) on the created dataset.
Args:
dataset_data (DatasetCreationPayload): Dataset creation parameters containing:
- name: The name for the new dataset
user: The authenticated user creating the dataset
Returns:
DatasetDTO: The created or existing dataset object containing:
- id: Unique dataset identifier
- name: Dataset name
- created_at: When the dataset was created
- updated_at: When the dataset was last updated
- owner_id: ID of the dataset owner
Raises:
HTTPException: If there's an error creating the dataset
"""
try:
datasets = await get_datasets_by_name([dataset_data.name], user.id)
@ -118,6 +163,23 @@ def get_datasets_router() -> APIRouter:
"/{dataset_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}}
)
async def delete_dataset(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
"""
Delete a dataset by its ID.
This endpoint permanently deletes a dataset and all its associated data.
The user must have delete permissions on the dataset to perform this operation.
Args:
dataset_id (UUID): The unique identifier of the dataset to delete
user: The authenticated user requesting the deletion
Returns:
None: No content returned on successful deletion
Raises:
DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access
HTTPException: If there's an error during deletion
"""
from cognee.modules.data.methods import get_dataset, delete_dataset
dataset = await get_dataset(user.id, dataset_id)
@ -135,6 +197,26 @@ def get_datasets_router() -> APIRouter:
async def delete_data(
dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Delete a specific data item from a dataset.
This endpoint removes a specific data item from a dataset while keeping
the dataset itself intact. The user must have delete permissions on the
dataset to perform this operation.
Args:
dataset_id (UUID): The unique identifier of the dataset containing the data
data_id (UUID): The unique identifier of the data item to delete
user: The authenticated user requesting the deletion
Returns:
None: No content returned on successful deletion
Raises:
DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access
DataNotFoundError: If the data item doesn't exist in the dataset
HTTPException: If there's an error during deletion
"""
from cognee.modules.data.methods import get_data, delete_data
from cognee.modules.data.methods import get_dataset
@ -153,22 +235,36 @@ def get_datasets_router() -> APIRouter:
@router.get("/{dataset_id}/graph", response_model=GraphDTO)
async def get_dataset_graph(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
try:
from cognee.modules.data.methods import get_dataset
"""
Get the knowledge graph visualization for a dataset.
dataset = await get_dataset(user.id, dataset_id)
This endpoint retrieves the knowledge graph data for a specific dataset,
including nodes and edges that represent the relationships between entities
in the dataset. The graph data is formatted for visualization purposes.
formatted_graph_data = await get_formatted_graph_data(dataset.id, user.id)
Args:
dataset_id (UUID): The unique identifier of the dataset
user: The authenticated user requesting the graph data
return JSONResponse(
status_code=200,
content=formatted_graph_data,
)
except Exception:
return JSONResponse(
status_code=409,
content="Error retrieving dataset graph data.",
)
Returns:
GraphDTO: The graph data containing:
- nodes: List of graph nodes with id, label, and properties
- edges: List of graph edges with source, target, and label
Raises:
DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access
HTTPException: If there's an error retrieving the graph data
"""
from cognee.modules.data.methods import get_dataset
dataset = await get_dataset(user.id, dataset_id)
if dataset is None:
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
graph_data = await get_formatted_graph_data(dataset)
return graph_data
@router.get(
"/{dataset_id}/data",
@ -176,6 +272,31 @@ def get_datasets_router() -> APIRouter:
responses={404: {"model": ErrorResponseDTO}},
)
async def get_dataset_data(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
"""
Get all data items in a dataset.
This endpoint retrieves all data items (documents, files, etc.) that belong
to a specific dataset. Each data item includes metadata such as name, type,
creation time, and storage location.
Args:
dataset_id (UUID): The unique identifier of the dataset
user: The authenticated user requesting the data
Returns:
List[DataDTO]: A list of data objects containing:
- id: Unique data item identifier
- name: Data item name
- created_at: When the data was added
- updated_at: When the data was last updated
- extension: File extension
- mime_type: MIME type of the data
- raw_data_location: Storage location of the raw data
Raises:
DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access
HTTPException: If there's an error retrieving the data
"""
from cognee.modules.data.methods import get_dataset_data, get_dataset
# Verify user has permission to read dataset
@ -199,22 +320,54 @@ def get_datasets_router() -> APIRouter:
datasets: Annotated[List[UUID], Query(alias="dataset")] = None,
user: User = Depends(get_authenticated_user),
):
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
"""
Get the processing status of datasets.
try:
# Verify user has permission to read dataset
await get_authorized_existing_datasets(datasets, "read", user)
This endpoint retrieves the current processing status of one or more datasets,
indicating whether they are being processed, have completed processing, or
encountered errors during pipeline execution.
datasets_statuses = await cognee_datasets.get_status(datasets)
Args:
datasets: List of dataset UUIDs to check status for (query parameter "dataset")
user: The authenticated user requesting the status
return datasets_statuses
except Exception as error:
return JSONResponse(status_code=409, content={"error": str(error)})
Returns:
Dict[str, PipelineRunStatus]: A dictionary mapping dataset IDs to their
processing status (e.g., "pending", "running", "completed", "failed")
Raises:
HTTPException: If there's an error retrieving the status information
"""
from cognee.modules.data.methods import get_dataset_status
dataset_status = await get_dataset_status(datasets, user.id)
return dataset_status
@router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
async def get_raw_data(
dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Download the raw data file for a specific data item.
This endpoint allows users to download the original, unprocessed data file
for a specific data item within a dataset. The file is returned as a direct
download with appropriate headers.
Args:
dataset_id (UUID): The unique identifier of the dataset containing the data
data_id (UUID): The unique identifier of the data item to download
user: The authenticated user requesting the download
Returns:
FileResponse: The raw data file as a downloadable response
Raises:
DatasetNotFoundError: If the dataset doesn't exist or user doesn't have access
DataNotFoundError: If the data item doesn't exist in the dataset
HTTPException: If there's an error accessing the raw data file
"""
from cognee.modules.data.methods import get_data
from cognee.modules.data.methods import get_dataset_data

View file

@ -18,6 +18,26 @@ def get_permissions_router() -> APIRouter:
principal_id: UUID,
user: User = Depends(get_authenticated_user),
):
"""
Grant permission on datasets to a principal (user or role).
This endpoint allows granting specific permissions on one or more datasets
to a principal (which can be a user or role). The authenticated user must
have appropriate permissions to grant access to the specified datasets.
Args:
permission_name (str): The name of the permission to grant (e.g., "read", "write", "delete")
dataset_ids (List[UUID]): List of dataset UUIDs to grant permission on
principal_id (UUID): The UUID of the principal (user or role) to grant permission to
user: The authenticated user granting the permission
Returns:
JSONResponse: Success message indicating permission was assigned
Raises:
HTTPException: If there's an error granting the permission
PermissionDeniedError: If the user doesn't have permission to grant access
"""
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
await authorized_give_permission_on_datasets(
@ -33,6 +53,24 @@ def get_permissions_router() -> APIRouter:
@permissions_router.post("/roles")
async def create_role(role_name: str, user: User = Depends(get_authenticated_user)):
"""
Create a new role.
This endpoint creates a new role with the specified name. Roles are used
to group permissions and can be assigned to users to manage access control
more efficiently. The authenticated user becomes the owner of the created role.
Args:
role_name (str): The name of the role to create
user: The authenticated user creating the role
Returns:
JSONResponse: Success message indicating the role was created
Raises:
HTTPException: If there's an error creating the role
ValidationError: If the role name is invalid or already exists
"""
from cognee.modules.users.roles.methods import create_role as create_role_method
await create_role_method(role_name=role_name, owner_id=user.id)
@ -43,6 +81,26 @@ def get_permissions_router() -> APIRouter:
async def add_user_to_role(
user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Add a user to a role.
This endpoint assigns a user to a specific role, granting them all the
permissions associated with that role. The authenticated user must be
the owner of the role or have appropriate administrative permissions.
Args:
user_id (UUID): The UUID of the user to add to the role
role_id (UUID): The UUID of the role to assign the user to
user: The authenticated user performing the role assignment
Returns:
JSONResponse: Success message indicating the user was added to the role
Raises:
HTTPException: If there's an error adding the user to the role
PermissionDeniedError: If the user doesn't have permission to assign roles
ValidationError: If the user or role doesn't exist
"""
from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method
await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id)
@ -53,6 +111,26 @@ def get_permissions_router() -> APIRouter:
async def add_user_to_tenant(
user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Add a user to a tenant.
This endpoint assigns a user to a specific tenant, allowing them to access
resources and data associated with that tenant. The authenticated user must
be the owner of the tenant or have appropriate administrative permissions.
Args:
user_id (UUID): The UUID of the user to add to the tenant
tenant_id (UUID): The UUID of the tenant to assign the user to
user: The authenticated user performing the tenant assignment
Returns:
JSONResponse: Success message indicating the user was added to the tenant
Raises:
HTTPException: If there's an error adding the user to the tenant
PermissionDeniedError: If the user doesn't have permission to assign tenants
ValidationError: If the user or tenant doesn't exist
"""
from cognee.modules.users.tenants.methods import add_user_to_tenant
await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id)
@ -61,6 +139,24 @@ def get_permissions_router() -> APIRouter:
@permissions_router.post("/tenants")
async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)):
"""
Create a new tenant.
This endpoint creates a new tenant with the specified name. Tenants are used
to organize users and resources in multi-tenant environments, providing
isolation and access control between different groups or organizations.
Args:
tenant_name (str): The name of the tenant to create
user: The authenticated user creating the tenant
Returns:
JSONResponse: Success message indicating the tenant was created
Raises:
HTTPException: If there's an error creating the tenant
ValidationError: If the tenant name is invalid or already exists
"""
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
await create_tenant_method(tenant_name=tenant_name, user_id=user.id)

View file

@ -32,6 +32,22 @@ def get_search_router() -> APIRouter:
@router.get("", response_model=list[SearchHistoryItem])
async def get_search_history(user: User = Depends(get_authenticated_user)):
"""
Get search history for the authenticated user.
This endpoint retrieves the search history for the authenticated user,
returning a list of previously executed searches with their timestamps.
Returns:
List[SearchHistoryItem]: A list of search history items containing:
- id: Unique identifier for the search
- text: The search query text
- user: User who performed the search
- created_at: When the search was performed
Raises:
HTTPException: If there's an error retrieving the search history
"""
try:
history = await get_history(user.id, limit=0)
@ -41,7 +57,34 @@ def get_search_router() -> APIRouter:
@router.post("", response_model=list)
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
"""This endpoint is responsible for searching for nodes in the graph."""
"""
Search for nodes in the graph database.
This endpoint performs semantic search across the knowledge graph to find
relevant nodes based on the provided query. It supports different search
types and can be scoped to specific datasets.
Args:
payload (SearchPayloadDTO): Search parameters containing:
- search_type: Type of search to perform (SearchType)
- datasets: Optional list of dataset names to search within
- dataset_ids: Optional list of dataset UUIDs to search within
- query: The search query string
- top_k: Maximum number of results to return (default: 10)
user: The authenticated user performing the search
Returns:
List: A list of search results containing relevant nodes from the graph
Raises:
HTTPException: If there's an error during the search operation
PermissionDeniedError: If user doesn't have permission to search datasets
Note:
- Datasets sent by name will only map to datasets owned by the request sender
- To search datasets not owned by the request sender, dataset UUID is needed
- If permission is denied, returns empty list instead of error
"""
from cognee.api.v1.search import search as cognee_search
try:

View file

@ -48,6 +48,24 @@ def get_settings_router() -> APIRouter:
@router.get("", response_model=SettingsDTO)
async def get_settings(user: User = Depends(get_authenticated_user)):
"""
Get the current system settings.
This endpoint retrieves the current configuration settings for the system,
including LLM (Large Language Model) configuration and vector database
configuration. These settings determine how the system processes and stores data.
Args:
user: The authenticated user requesting the settings
Returns:
SettingsDTO: The current system settings containing:
- llm: LLM configuration (provider, model, API key)
- vector_db: Vector database configuration (provider, URL, API key)
Raises:
HTTPException: If there's an error retrieving the settings
"""
from cognee.modules.settings import get_settings as get_cognee_settings
return get_cognee_settings()
@ -56,6 +74,26 @@ def get_settings_router() -> APIRouter:
async def save_settings(
new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)
):
"""
Save or update system settings.
This endpoint allows updating the system configuration settings. You can
update either the LLM configuration, vector database configuration, or both.
Only provided settings will be updated; others remain unchanged.
Args:
new_settings (SettingsPayloadDTO): The settings to update containing:
- llm: Optional LLM configuration (provider, model, API key)
- vector_db: Optional vector database configuration (provider, URL, API key)
user: The authenticated user making the changes
Returns:
None: No content returned on successful save
Raises:
HTTPException: If there's an error saving the settings
ValidationError: If the provided settings are invalid
"""
from cognee.modules.settings import save_llm_config, save_vector_db_config
if new_settings.llm is not None:

View file

@ -16,7 +16,26 @@ def get_visualize_router() -> APIRouter:
@router.get("", response_model=None)
async def visualize(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
"""This endpoint is responsible for adding data to the graph."""
"""
Generate an HTML visualization of the dataset's knowledge graph.
This endpoint creates an interactive HTML visualization of the knowledge graph
for a specific dataset. The visualization displays nodes and edges representing
entities and their relationships, allowing users to explore the graph structure
visually. The user must have read permissions on the dataset.
Args:
dataset_id (UUID): The unique identifier of the dataset to visualize
user: The authenticated user requesting the visualization
Returns:
HTMLResponse: An HTML page containing the interactive graph visualization
Raises:
HTTPException: If there's an error generating the visualization
PermissionDeniedError: If the user doesn't have permission to read the dataset
DatasetNotFoundError: If the dataset doesn't exist
"""
from cognee.api.v1.visualize import visualize_graph
try: