244 lines
9.6 KiB
Python
244 lines
9.6 KiB
Python
from uuid import UUID
|
|
from typing import Optional, List
|
|
from fastapi import APIRouter, Depends
|
|
from fastapi.responses import JSONResponse
|
|
|
|
|
|
from cognee.api.DTO import InDTO
|
|
from cognee.modules.users.models import User
|
|
from cognee.modules.users.methods import get_authenticated_user
|
|
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
|
from cognee.modules.sync.methods import get_running_sync_operations_for_user, get_sync_operation
|
|
from cognee.shared.utils import send_telemetry
|
|
from cognee.shared.logging_utils import get_logger
|
|
from cognee.api.v1.sync import SyncResponse
|
|
from cognee import __version__ as cognee_version
|
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
class SyncRequest(InDTO):
|
|
"""Request model for sync operations."""
|
|
|
|
dataset_ids: Optional[List[UUID]] = None
|
|
|
|
|
|
def get_sync_router() -> APIRouter:
|
|
router = APIRouter()
|
|
|
|
@router.post("", response_model=dict[str, SyncResponse])
|
|
async def sync_to_cloud(
|
|
request: SyncRequest,
|
|
user: User = Depends(get_authenticated_user),
|
|
):
|
|
"""
|
|
Sync local data to Cognee Cloud.
|
|
|
|
This endpoint triggers synchronization of local Cognee data to your cloud instance.
|
|
It uploads your local datasets, knowledge graphs, and processed data to the cloud
|
|
for backup, sharing, or cloud-based processing.
|
|
|
|
## Request Body (JSON)
|
|
```json
|
|
{
|
|
"dataset_ids": ["123e4567-e89b-12d3-a456-426614174000", "456e7890-e12b-34c5-d678-901234567000"]
|
|
}
|
|
```
|
|
|
|
## Response
|
|
Returns immediate response for the sync operation:
|
|
- **run_id**: Unique identifier for tracking the background sync operation
|
|
- **status**: Always "started" (operation runs in background)
|
|
- **dataset_ids**: List of dataset IDs being synced
|
|
- **dataset_names**: List of dataset names being synced
|
|
- **message**: Description of the background operation
|
|
- **timestamp**: When the sync was initiated
|
|
- **user_id**: User who initiated the sync
|
|
|
|
## Cloud Sync Features
|
|
- **Automatic Authentication**: Uses your Cognee Cloud credentials
|
|
- **Data Compression**: Optimizes transfer size for faster uploads
|
|
- **Smart Sync**: Automatically handles data updates efficiently
|
|
- **Progress Tracking**: Monitor sync status with sync_id
|
|
- **Error Recovery**: Automatic retry for failed transfers
|
|
- **Data Validation**: Ensures data integrity during transfer
|
|
|
|
## Example Usage
|
|
```bash
|
|
# Sync multiple datasets to cloud by IDs (JSON request)
|
|
curl -X POST "http://localhost:8000/api/v1/sync" \\
|
|
-H "Content-Type: application/json" \\
|
|
-H "Cookie: auth_token=your-token" \\
|
|
-d '{"dataset_ids": ["123e4567-e89b-12d3-a456-426614174000", "456e7890-e12b-34c5-d678-901234567000"]}'
|
|
|
|
# Sync all user datasets (empty request body or null dataset_ids)
|
|
curl -X POST "http://localhost:8000/api/v1/sync" \\
|
|
-H "Content-Type: application/json" \\
|
|
-H "Cookie: auth_token=your-token" \\
|
|
-d '{}'
|
|
```
|
|
|
|
## Error Codes
|
|
- **400 Bad Request**: Invalid dataset_ids format
|
|
- **401 Unauthorized**: Invalid or missing authentication
|
|
- **403 Forbidden**: User doesn't have permission to access dataset
|
|
- **404 Not Found**: Dataset not found
|
|
- **409 Conflict**: Sync operation conflict or cloud service unavailable
|
|
- **413 Payload Too Large**: Dataset too large for current cloud plan
|
|
- **429 Too Many Requests**: Rate limit exceeded
|
|
|
|
## Notes
|
|
- Sync operations run in the background - you get an immediate response
|
|
- Use the returned run_id to track progress (status API coming soon)
|
|
- Large datasets are automatically chunked for efficient transfer
|
|
- Cloud storage usage counts against your plan limits
|
|
- The sync will continue even if you close your connection
|
|
"""
|
|
send_telemetry(
|
|
"Cloud Sync API Endpoint Invoked",
|
|
user.id,
|
|
additional_properties={
|
|
"endpoint": "POST /v1/sync",
|
|
"cognee_version": cognee_version,
|
|
"dataset_ids": [str(id) for id in request.dataset_ids]
|
|
if request.dataset_ids
|
|
else "*",
|
|
},
|
|
)
|
|
|
|
from cognee.api.v1.sync import sync as cognee_sync
|
|
|
|
try:
|
|
# Check if user has any running sync operations
|
|
running_syncs = await get_running_sync_operations_for_user(user.id)
|
|
if running_syncs:
|
|
# Return information about the existing sync operation
|
|
existing_sync = running_syncs[0] # Get the most recent running sync
|
|
return JSONResponse(
|
|
status_code=409,
|
|
content={
|
|
"error": "Sync operation already in progress",
|
|
"details": {
|
|
"run_id": existing_sync.run_id,
|
|
"status": "already_running",
|
|
"dataset_ids": existing_sync.dataset_ids,
|
|
"dataset_names": existing_sync.dataset_names,
|
|
"message": f"You have a sync operation already in progress with run_id '{existing_sync.run_id}'. Use the status endpoint to monitor progress, or wait for it to complete before starting a new sync.",
|
|
"timestamp": existing_sync.created_at.isoformat(),
|
|
"progress_percentage": existing_sync.progress_percentage,
|
|
},
|
|
},
|
|
)
|
|
|
|
# Retrieve existing dataset and check permissions
|
|
datasets = await get_specific_user_permission_datasets(
|
|
user.id, "write", request.dataset_ids if request.dataset_ids else None
|
|
)
|
|
|
|
# Execute new cloud sync operation for all datasets
|
|
sync_result = await cognee_sync(
|
|
datasets=datasets,
|
|
user=user,
|
|
)
|
|
|
|
return sync_result
|
|
|
|
except ValueError as e:
|
|
return JSONResponse(status_code=400, content={"error": str(e)})
|
|
except PermissionError as e:
|
|
return JSONResponse(status_code=403, content={"error": str(e)})
|
|
except ConnectionError as e:
|
|
return JSONResponse(
|
|
status_code=409, content={"error": f"Cloud service unavailable: {str(e)}"}
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Cloud sync operation failed: {str(e)}")
|
|
return JSONResponse(status_code=409, content={"error": "Cloud sync operation failed."})
|
|
|
|
@router.get("/status")
|
|
async def get_sync_status_overview(
|
|
user: User = Depends(get_authenticated_user),
|
|
):
|
|
"""
|
|
Check if there are any running sync operations for the current user.
|
|
|
|
This endpoint provides a simple check to see if the user has any active sync operations
|
|
without needing to know specific run IDs.
|
|
|
|
## Response
|
|
Returns a simple status overview:
|
|
- **has_running_sync**: Boolean indicating if there are any running syncs
|
|
- **running_sync_count**: Number of currently running sync operations
|
|
- **latest_running_sync** (optional): Information about the most recent running sync if any exists
|
|
|
|
## Example Usage
|
|
```bash
|
|
curl -X GET "http://localhost:8000/api/v1/sync/status" \\
|
|
-H "Cookie: auth_token=your-token"
|
|
```
|
|
|
|
## Example Responses
|
|
|
|
**No running syncs:**
|
|
```json
|
|
{
|
|
"has_running_sync": false,
|
|
"running_sync_count": 0
|
|
}
|
|
```
|
|
|
|
**With running sync:**
|
|
```json
|
|
{
|
|
"has_running_sync": true,
|
|
"running_sync_count": 1,
|
|
"latest_running_sync": {
|
|
"run_id": "12345678-1234-5678-9012-123456789012",
|
|
"dataset_name": "My Dataset",
|
|
"progress_percentage": 45,
|
|
"created_at": "2025-01-01T00:00:00Z"
|
|
}
|
|
}
|
|
```
|
|
"""
|
|
send_telemetry(
|
|
"Sync Status Overview API Endpoint Invoked",
|
|
user.id,
|
|
additional_properties={
|
|
"endpoint": "GET /v1/sync/status",
|
|
"cognee_version": cognee_version,
|
|
},
|
|
)
|
|
|
|
try:
|
|
# Get any running sync operations for the user
|
|
running_syncs = await get_running_sync_operations_for_user(user.id)
|
|
|
|
response = {
|
|
"has_running_sync": len(running_syncs) > 0,
|
|
"running_sync_count": len(running_syncs),
|
|
}
|
|
|
|
# If there are running syncs, include info about the latest one
|
|
if running_syncs:
|
|
latest_sync = running_syncs[0] # Already ordered by created_at desc
|
|
response["latest_running_sync"] = {
|
|
"run_id": latest_sync.run_id,
|
|
"dataset_ids": latest_sync.dataset_ids,
|
|
"dataset_names": latest_sync.dataset_names,
|
|
"progress_percentage": latest_sync.progress_percentage,
|
|
"created_at": latest_sync.created_at.isoformat()
|
|
if latest_sync.created_at
|
|
else None,
|
|
}
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get sync status overview: {str(e)}")
|
|
return JSONResponse(
|
|
status_code=500, content={"error": "Failed to get sync status overview"}
|
|
)
|
|
|
|
return router
|