Merge branch 'dev' into limit_major_dep_version_change

This commit is contained in:
Igor Ilic 2025-07-14 15:34:43 +02:00 committed by GitHub
commit 3e458a66d8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 518 additions and 34 deletions

View file

@ -25,7 +25,40 @@ def get_add_router() -> APIRouter:
datasetId: Optional[UUID] = Form(default=None),
user: User = Depends(get_authenticated_user),
):
"""This endpoint is responsible for adding data to the graph."""
"""
Add data to a dataset for processing and knowledge graph construction.
This endpoint accepts various types of data (files, URLs, GitHub repositories)
and adds them to a specified dataset for processing. The data is ingested,
analyzed, and integrated into the knowledge graph.
## Request Parameters
- **data** (List[UploadFile]): List of files to upload. Can also include:
- HTTP URLs (if ALLOW_HTTP_REQUESTS is enabled)
- GitHub repository URLs (will be cloned and processed)
- Regular file uploads
- **datasetName** (Optional[str]): Name of the dataset to add data to
- **datasetId** (Optional[UUID]): UUID of the dataset to add data to
Either datasetName or datasetId must be provided.
## Response
Returns information about the add operation containing:
- Status of the operation
- Details about the processed data
- Any relevant metadata from the ingestion process
## Error Codes
- **400 Bad Request**: Neither datasetId nor datasetName provided
- **409 Conflict**: Error during add operation
- **403 Forbidden**: User doesn't have permission to add to dataset
## Notes
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
- GitHub repositories are cloned and all files are processed
- HTTP URLs are fetched and their content is processed
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
"""
from cognee.api.v1.add import add as cognee_add
if not datasetId and not datasetName:

View file

@ -31,7 +31,22 @@ def get_code_pipeline_router() -> APIRouter:
@router.post("/index", response_model=None)
async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO):
"""This endpoint is responsible for running the indexation on code repo."""
"""
Run indexation on a code repository.
This endpoint processes a code repository to create a knowledge graph
of the codebase structure, dependencies, and relationships.
## Request Parameters
- **repo_path** (str): Path to the code repository
- **include_docs** (bool): Whether to include documentation files (default: false)
## Response
No content returned. Processing results are logged.
## Error Codes
- **409 Conflict**: Error during indexation process
"""
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
try:
@ -42,7 +57,22 @@ def get_code_pipeline_router() -> APIRouter:
@router.post("/retrieve", response_model=list[dict])
async def code_pipeline_retrieve(payload: CodePipelineRetrievePayloadDTO):
"""This endpoint is responsible for retrieving the context."""
"""
Retrieve context from the code knowledge graph.
This endpoint searches the indexed code repository to find relevant
context based on the provided query.
## Request Parameters
- **query** (str): Search query for code context
- **full_input** (str): Full input text for processing
## Response
Returns a list of relevant code files and context as JSON.
## Error Codes
- **409 Conflict**: Error during retrieval process
"""
try:
query = (
payload.full_input.replace("cognee ", "")

View file

@ -41,7 +41,50 @@ def get_cognify_router() -> APIRouter:
@router.post("", response_model=dict)
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
"""This endpoint is responsible for the cognitive processing of the content."""
"""
Transform datasets into structured knowledge graphs through cognitive processing.
This endpoint is the core of Cognee's intelligence layer, responsible for converting
raw text, documents, and data added through the add endpoint into semantic knowledge graphs.
It performs deep analysis to extract entities, relationships, and insights from ingested content.
## Processing Pipeline
1. Document classification and permission validation
2. Text chunking and semantic segmentation
3. Entity extraction using LLM-powered analysis
4. Relationship detection and graph construction
5. Vector embeddings generation for semantic search
6. Content summarization and indexing
## Request Parameters
- **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
- **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
- **graph_model** (Optional[BaseModel]): Custom Pydantic model defining the knowledge graph schema. Defaults to KnowledgeGraph for general-purpose processing.
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
## Response
- **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
- **Background execution**: Pipeline run metadata including pipeline_run_id for status monitoring via WebSocket subscription
## Error Codes
- **400 Bad Request**: When neither datasets nor dataset_ids are provided, or when specified datasets don't exist
- **409 Conflict**: When processing fails due to system errors, missing LLM API keys, database connection failures, or corrupted content
## Example Request
```json
{
"datasets": ["research_papers", "documentation"],
"run_in_background": false
}
```
## Notes
To cognify data in datasets not owned by the user and for which the current user has write permission,
the dataset_id must be used (when ENABLE_BACKEND_ACCESS_CONTROL is set to True).
## Next Steps
After successful processing, use the search endpoints to query the generated knowledge graph for insights, relationships, and semantic search.
"""
if not payload.datasets and not payload.dataset_ids:
return JSONResponse(
status_code=400, content={"error": "No datasets or dataset_ids provided"}

View file

@ -74,6 +74,24 @@ def get_datasets_router() -> APIRouter:
@router.get("", response_model=list[DatasetDTO])
async def get_datasets(user: User = Depends(get_authenticated_user)):
"""
Get all datasets accessible to the authenticated user.
This endpoint retrieves all datasets that the authenticated user has
read permissions for. The datasets are returned with their metadata
including ID, name, creation time, and owner information.
## Response
Returns a list of dataset objects containing:
- **id**: Unique dataset identifier
- **name**: Dataset name
- **created_at**: When the dataset was created
- **updated_at**: When the dataset was last updated
- **owner_id**: ID of the dataset owner
## Error Codes
- **418 I'm a teapot**: Error retrieving datasets
"""
try:
datasets = await get_all_user_permission_datasets(user, "read")
@ -89,6 +107,29 @@ def get_datasets_router() -> APIRouter:
async def create_new_dataset(
dataset_data: DatasetCreationPayload, user: User = Depends(get_authenticated_user)
):
"""
Create a new dataset or return existing dataset with the same name.
This endpoint creates a new dataset with the specified name. If a dataset
with the same name already exists for the user, it returns the existing
dataset instead of creating a duplicate. The user is automatically granted
all permissions (read, write, share, delete) on the created dataset.
## Request Parameters
- **dataset_data** (DatasetCreationPayload): Dataset creation parameters containing:
- **name**: The name for the new dataset
## Response
Returns the created or existing dataset object containing:
- **id**: Unique dataset identifier
- **name**: Dataset name
- **created_at**: When the dataset was created
- **updated_at**: When the dataset was last updated
- **owner_id**: ID of the dataset owner
## Error Codes
- **418 I'm a teapot**: Error creating dataset
"""
try:
datasets = await get_datasets_by_name([dataset_data.name], user.id)
@ -118,6 +159,22 @@ def get_datasets_router() -> APIRouter:
"/{dataset_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}}
)
async def delete_dataset(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
"""
Delete a dataset by its ID.
This endpoint permanently deletes a dataset and all its associated data.
The user must have delete permissions on the dataset to perform this operation.
## Path Parameters
- **dataset_id** (UUID): The unique identifier of the dataset to delete
## Response
No content returned on successful deletion.
## Error Codes
- **404 Not Found**: Dataset doesn't exist or user doesn't have access
- **500 Internal Server Error**: Error during deletion
"""
from cognee.modules.data.methods import get_dataset, delete_dataset
dataset = await get_dataset(user.id, dataset_id)
@ -135,6 +192,24 @@ def get_datasets_router() -> APIRouter:
async def delete_data(
dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Delete a specific data item from a dataset.
This endpoint removes a specific data item from a dataset while keeping
the dataset itself intact. The user must have delete permissions on the
dataset to perform this operation.
## Path Parameters
- **dataset_id** (UUID): The unique identifier of the dataset containing the data
- **data_id** (UUID): The unique identifier of the data item to delete
## Response
No content returned on successful deletion.
## Error Codes
- **404 Not Found**: Dataset or data item doesn't exist, or user doesn't have access
- **500 Internal Server Error**: Error during deletion
"""
from cognee.modules.data.methods import get_data, delete_data
from cognee.modules.data.methods import get_dataset
@ -153,22 +228,35 @@ def get_datasets_router() -> APIRouter:
@router.get("/{dataset_id}/graph", response_model=GraphDTO)
async def get_dataset_graph(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
try:
from cognee.modules.data.methods import get_dataset
"""
Get the knowledge graph visualization for a dataset.
dataset = await get_dataset(user.id, dataset_id)
This endpoint retrieves the knowledge graph data for a specific dataset,
including nodes and edges that represent the relationships between entities
in the dataset. The graph data is formatted for visualization purposes.
formatted_graph_data = await get_formatted_graph_data(dataset.id, user.id)
## Path Parameters
- **dataset_id** (UUID): The unique identifier of the dataset
return JSONResponse(
status_code=200,
content=formatted_graph_data,
)
except Exception:
return JSONResponse(
status_code=409,
content="Error retrieving dataset graph data.",
)
## Response
Returns the graph data containing:
- **nodes**: List of graph nodes with id, label, and properties
- **edges**: List of graph edges with source, target, and label
## Error Codes
- **404 Not Found**: Dataset doesn't exist or user doesn't have access
- **500 Internal Server Error**: Error retrieving graph data
"""
from cognee.modules.data.methods import get_dataset
dataset = await get_dataset(user.id, dataset_id)
if dataset is None:
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
graph_data = await get_formatted_graph_data(dataset)
return graph_data
@router.get(
"/{dataset_id}/data",
@ -176,6 +264,30 @@ def get_datasets_router() -> APIRouter:
responses={404: {"model": ErrorResponseDTO}},
)
async def get_dataset_data(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
"""
Get all data items in a dataset.
This endpoint retrieves all data items (documents, files, etc.) that belong
to a specific dataset. Each data item includes metadata such as name, type,
creation time, and storage location.
## Path Parameters
- **dataset_id** (UUID): The unique identifier of the dataset
## Response
Returns a list of data objects containing:
- **id**: Unique data item identifier
- **name**: Data item name
- **created_at**: When the data was added
- **updated_at**: When the data was last updated
- **extension**: File extension
- **mime_type**: MIME type of the data
- **raw_data_location**: Storage location of the raw data
## Error Codes
- **404 Not Found**: Dataset doesn't exist or user doesn't have access
- **500 Internal Server Error**: Error retrieving data
"""
from cognee.modules.data.methods import get_dataset_data, get_dataset
# Verify user has permission to read dataset
@ -199,22 +311,54 @@ def get_datasets_router() -> APIRouter:
datasets: Annotated[List[UUID], Query(alias="dataset")] = None,
user: User = Depends(get_authenticated_user),
):
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
"""
Get the processing status of datasets.
try:
# Verify user has permission to read dataset
await get_authorized_existing_datasets(datasets, "read", user)
This endpoint retrieves the current processing status of one or more datasets,
indicating whether they are being processed, have completed processing, or
encountered errors during pipeline execution.
datasets_statuses = await cognee_datasets.get_status(datasets)
## Query Parameters
- **dataset** (List[UUID]): List of dataset UUIDs to check status for
return datasets_statuses
except Exception as error:
return JSONResponse(status_code=409, content={"error": str(error)})
## Response
Returns a dictionary mapping dataset IDs to their processing status:
- **pending**: Dataset is queued for processing
- **running**: Dataset is currently being processed
- **completed**: Dataset processing completed successfully
- **failed**: Dataset processing encountered an error
## Error Codes
- **500 Internal Server Error**: Error retrieving status information
"""
from cognee.modules.data.methods import get_dataset_status
dataset_status = await get_dataset_status(datasets, user.id)
return dataset_status
@router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
async def get_raw_data(
dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Download the raw data file for a specific data item.
This endpoint allows users to download the original, unprocessed data file
for a specific data item within a dataset. The file is returned as a direct
download with appropriate headers.
## Path Parameters
- **dataset_id** (UUID): The unique identifier of the dataset containing the data
- **data_id** (UUID): The unique identifier of the data item to download
## Response
Returns the raw data file as a downloadable response.
## Error Codes
- **404 Not Found**: Dataset or data item doesn't exist, or user doesn't have access
- **500 Internal Server Error**: Error accessing the raw data file
"""
from cognee.modules.data.methods import get_data
from cognee.modules.data.methods import get_dataset_data

View file

@ -24,13 +24,29 @@ def get_delete_router() -> APIRouter:
mode: str = Form("soft"),
user: User = Depends(get_authenticated_user),
):
"""This endpoint is responsible for deleting data from the graph.
"""
Delete data from the knowledge graph.
Args:
data: The data to delete (files, URLs, or text)
dataset_name: Name of the dataset to delete from (default: "main_dataset")
mode: "soft" (default) or "hard" - hard mode also deletes degree-one entity nodes
user: Authenticated user
This endpoint removes specified data from the knowledge graph. It supports
both soft deletion (preserving related entities) and hard deletion (removing
degree-one entity nodes as well).
## Request Parameters
- **data** (List[UploadFile]): The data to delete (files, URLs, or text)
- **dataset_name** (str): Name of the dataset to delete from (default: "main_dataset")
- **dataset_id** (UUID): UUID of the dataset to delete from
- **mode** (str): Deletion mode - "soft" (default) or "hard"
## Response
No content returned on successful deletion.
## Error Codes
- **409 Conflict**: Error during deletion process
- **403 Forbidden**: User doesn't have permission to delete from dataset
## Notes
- **Soft mode**: Preserves related entities and relationships
- **Hard mode**: Also deletes degree-one entity nodes
"""
from cognee.api.v1.delete import delete as cognee_delete

View file

@ -18,6 +18,28 @@ def get_permissions_router() -> APIRouter:
principal_id: UUID,
user: User = Depends(get_authenticated_user),
):
"""
Grant permission on datasets to a principal (user or role).
This endpoint allows granting specific permissions on one or more datasets
to a principal (which can be a user or role). The authenticated user must
have appropriate permissions to grant access to the specified datasets.
## Path Parameters
- **principal_id** (UUID): The UUID of the principal (user or role) to grant permission to
## Request Parameters
- **permission_name** (str): The name of the permission to grant (e.g., "read", "write", "delete")
- **dataset_ids** (List[UUID]): List of dataset UUIDs to grant permission on
## Response
Returns a success message indicating permission was assigned.
## Error Codes
- **400 Bad Request**: Invalid request parameters
- **403 Forbidden**: User doesn't have permission to grant access
- **500 Internal Server Error**: Error granting permission
"""
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
await authorized_give_permission_on_datasets(
@ -33,6 +55,23 @@ def get_permissions_router() -> APIRouter:
@permissions_router.post("/roles")
async def create_role(role_name: str, user: User = Depends(get_authenticated_user)):
"""
Create a new role.
This endpoint creates a new role with the specified name. Roles are used
to group permissions and can be assigned to users to manage access control
more efficiently. The authenticated user becomes the owner of the created role.
## Request Parameters
- **role_name** (str): The name of the role to create
## Response
Returns a success message indicating the role was created.
## Error Codes
- **400 Bad Request**: Invalid role name or role already exists
- **500 Internal Server Error**: Error creating the role
"""
from cognee.modules.users.roles.methods import create_role as create_role_method
await create_role_method(role_name=role_name, owner_id=user.id)
@ -43,6 +82,28 @@ def get_permissions_router() -> APIRouter:
async def add_user_to_role(
user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Add a user to a role.
This endpoint assigns a user to a specific role, granting them all the
permissions associated with that role. The authenticated user must be
the owner of the role or have appropriate administrative permissions.
## Path Parameters
- **user_id** (UUID): The UUID of the user to add to the role
## Request Parameters
- **role_id** (UUID): The UUID of the role to assign the user to
## Response
Returns a success message indicating the user was added to the role.
## Error Codes
- **400 Bad Request**: Invalid user or role ID
- **403 Forbidden**: User doesn't have permission to assign roles
- **404 Not Found**: User or role doesn't exist
- **500 Internal Server Error**: Error adding user to role
"""
from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method
await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id)
@ -53,6 +114,28 @@ def get_permissions_router() -> APIRouter:
async def add_user_to_tenant(
user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user)
):
"""
Add a user to a tenant.
This endpoint assigns a user to a specific tenant, allowing them to access
resources and data associated with that tenant. The authenticated user must
be the owner of the tenant or have appropriate administrative permissions.
## Path Parameters
- **user_id** (UUID): The UUID of the user to add to the tenant
## Request Parameters
- **tenant_id** (UUID): The UUID of the tenant to assign the user to
## Response
Returns a success message indicating the user was added to the tenant.
## Error Codes
- **400 Bad Request**: Invalid user or tenant ID
- **403 Forbidden**: User doesn't have permission to assign tenants
- **404 Not Found**: User or tenant doesn't exist
- **500 Internal Server Error**: Error adding user to tenant
"""
from cognee.modules.users.tenants.methods import add_user_to_tenant
await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id)
@ -61,6 +144,23 @@ def get_permissions_router() -> APIRouter:
@permissions_router.post("/tenants")
async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)):
"""
Create a new tenant.
This endpoint creates a new tenant with the specified name. Tenants are used
to organize users and resources in multi-tenant environments, providing
isolation and access control between different groups or organizations.
## Request Parameters
- **tenant_name** (str): The name of the tenant to create
## Response
Returns a success message indicating the tenant was created.
## Error Codes
- **400 Bad Request**: Invalid tenant name or tenant already exists
- **500 Internal Server Error**: Error creating the tenant
"""
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
await create_tenant_method(tenant_name=tenant_name, user_id=user.id)

View file

@ -74,7 +74,29 @@ def get_responses_router() -> APIRouter:
user: User = Depends(get_authenticated_user),
) -> ResponseBody:
"""
OpenAI-compatible responses endpoint with function calling support
OpenAI-compatible responses endpoint with function calling support.
This endpoint provides OpenAI-compatible API responses with integrated
function calling capabilities for Cognee operations.
## Request Parameters
- **input** (str): The input text to process
- **model** (str): The model to use for processing
- **tools** (Optional[List[Dict]]): Available tools for function calling
- **tool_choice** (Any): Tool selection strategy (default: "auto")
- **temperature** (float): Response randomness (default: 1.0)
## Response
Returns an OpenAI-compatible response body with function call results.
## Error Codes
- **400 Bad Request**: Invalid request parameters
- **500 Internal Server Error**: Error processing request
## Notes
- Compatible with OpenAI API format
- Supports function calling with Cognee tools
- Uses default tools if none provided
"""
# Use default tools if none provided
tools = request.tools or DEFAULT_TOOLS

View file

@ -32,6 +32,22 @@ def get_search_router() -> APIRouter:
@router.get("", response_model=list[SearchHistoryItem])
async def get_search_history(user: User = Depends(get_authenticated_user)):
"""
Get search history for the authenticated user.
This endpoint retrieves the search history for the authenticated user,
returning a list of previously executed searches with their timestamps.
## Response
Returns a list of search history items containing:
- **id**: Unique identifier for the search
- **text**: The search query text
- **user**: User who performed the search
- **created_at**: When the search was performed
## Error Codes
- **500 Internal Server Error**: Error retrieving search history
"""
try:
history = await get_history(user.id, limit=0)
@ -41,7 +57,32 @@ def get_search_router() -> APIRouter:
@router.post("", response_model=list)
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
"""This endpoint is responsible for searching for nodes in the graph."""
"""
Search for nodes in the graph database.
This endpoint performs semantic search across the knowledge graph to find
relevant nodes based on the provided query. It supports different search
types and can be scoped to specific datasets.
## Request Parameters
- **search_type** (SearchType): Type of search to perform
- **datasets** (Optional[List[str]]): List of dataset names to search within
- **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to search within
- **query** (str): The search query string
- **top_k** (Optional[int]): Maximum number of results to return (default: 10)
## Response
Returns a list of search results containing relevant nodes from the graph.
## Error Codes
- **409 Conflict**: Error during search operation
- **403 Forbidden**: User doesn't have permission to search datasets (returns empty list)
## Notes
- Datasets sent by name will only map to datasets owned by the request sender
- To search datasets not owned by the request sender, dataset UUID is needed
- If permission is denied, returns empty list instead of error
"""
from cognee.api.v1.search import search as cognee_search
try:

View file

@ -48,6 +48,21 @@ def get_settings_router() -> APIRouter:
@router.get("", response_model=SettingsDTO)
async def get_settings(user: User = Depends(get_authenticated_user)):
"""
Get the current system settings.
This endpoint retrieves the current configuration settings for the system,
including LLM (Large Language Model) configuration and vector database
configuration. These settings determine how the system processes and stores data.
## Response
Returns the current system settings containing:
- **llm**: LLM configuration (provider, model, API key)
- **vector_db**: Vector database configuration (provider, URL, API key)
## Error Codes
- **500 Internal Server Error**: Error retrieving settings
"""
from cognee.modules.settings import get_settings as get_cognee_settings
return get_cognee_settings()
@ -56,6 +71,24 @@ def get_settings_router() -> APIRouter:
async def save_settings(
new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)
):
"""
Save or update system settings.
This endpoint allows updating the system configuration settings. You can
update either the LLM configuration, vector database configuration, or both.
Only provided settings will be updated; others remain unchanged.
## Request Parameters
- **llm** (Optional[LLMConfigInputDTO]): LLM configuration (provider, model, API key)
- **vector_db** (Optional[VectorDBConfigInputDTO]): Vector database configuration (provider, URL, API key)
## Response
No content returned on successful save.
## Error Codes
- **400 Bad Request**: Invalid settings provided
- **500 Internal Server Error**: Error saving settings
"""
from cognee.modules.settings import save_llm_config, save_vector_db_config
if new_settings.llm is not None:

View file

@ -16,7 +16,29 @@ def get_visualize_router() -> APIRouter:
@router.get("", response_model=None)
async def visualize(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
"""This endpoint is responsible for adding data to the graph."""
"""
Generate an HTML visualization of the dataset's knowledge graph.
This endpoint creates an interactive HTML visualization of the knowledge graph
for a specific dataset. The visualization displays nodes and edges representing
entities and their relationships, allowing users to explore the graph structure
visually.
## Query Parameters
- **dataset_id** (UUID): The unique identifier of the dataset to visualize
## Response
Returns an HTML page containing the interactive graph visualization.
## Error Codes
- **404 Not Found**: Dataset doesn't exist
- **403 Forbidden**: User doesn't have permission to read the dataset
- **500 Internal Server Error**: Error generating visualization
## Notes
- User must have read permissions on the dataset
- Visualization is interactive and allows graph exploration
"""
from cognee.api.v1.visualize import visualize_graph
try: