Compare commits
1 commit
main
...
split-add-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ae242de7f6 |
1 changed files with 58 additions and 29 deletions
|
|
@ -5,7 +5,6 @@ from fastapi import Form, UploadFile, Depends
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
import subprocess
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
@ -28,15 +27,58 @@ def get_add_router() -> APIRouter:
|
||||||
"""
|
"""
|
||||||
Add data to a dataset for processing and knowledge graph construction.
|
Add data to a dataset for processing and knowledge graph construction.
|
||||||
|
|
||||||
This endpoint accepts various types of data (files, URLs, GitHub repositories)
|
This endpoint accepts various types of data files and adds them to a specified dataset for processing.
|
||||||
and adds them to a specified dataset for processing. The data is ingested,
|
The data is ingested, analyzed, and integrated into the knowledge graph.
|
||||||
analyzed, and integrated into the knowledge graph.
|
|
||||||
|
|
||||||
## Request Parameters
|
## Request Parameters
|
||||||
- **data** (List[UploadFile]): List of files to upload. Can also include:
|
- **data** (List[UploadFile]): List of files to upload. Regular file uploads.
|
||||||
- HTTP URLs (if ALLOW_HTTP_REQUESTS is enabled)
|
- **datasetName** (Optional[str]): Name of the dataset to add data to
|
||||||
- GitHub repository URLs (will be cloned and processed)
|
- **datasetId** (Optional[UUID]): UUID of the dataset to add data to
|
||||||
- Regular file uploads
|
|
||||||
|
Either datasetName or datasetId must be provided.
|
||||||
|
|
||||||
|
## Response
|
||||||
|
Returns information about the add operation containing:
|
||||||
|
- Status of the operation
|
||||||
|
- Details about the processed data
|
||||||
|
- Any relevant metadata from the ingestion process
|
||||||
|
|
||||||
|
## Error Codes
|
||||||
|
- **400 Bad Request**: Neither datasetId nor datasetName provided
|
||||||
|
- **409 Conflict**: Error during add operation
|
||||||
|
- **403 Forbidden**: User doesn't have permission to add to dataset
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
||||||
|
|
||||||
|
"""
|
||||||
|
from cognee.api.v1.add import add as cognee_add
|
||||||
|
|
||||||
|
if not datasetId and not datasetName:
|
||||||
|
raise ValueError("Either datasetId or datasetName must be provided.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
|
||||||
|
return add_run.model_dump()
|
||||||
|
except Exception as error:
|
||||||
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
||||||
|
|
||||||
|
@router.post("/text", response_model=dict)
|
||||||
|
async def add_text(
|
||||||
|
text_data: List[str] = Form(description="Plain-text data"),
|
||||||
|
datasetName: Optional[str] = Form(default=None),
|
||||||
|
datasetId: Optional[UUID] = Form(default=None),
|
||||||
|
user: User = Depends(get_authenticated_user),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Add text data to a dataset for processing and knowledge graph construction.
|
||||||
|
|
||||||
|
This endpoint accepts only text and adds it to a specified dataset for processing. The text is ingested,
|
||||||
|
analyzed, and integrated into the knowledge graph.
|
||||||
|
|
||||||
|
## Request Parameters
|
||||||
|
- **text_data** (List[str]): List of text to process. Can also include:
|
||||||
|
HTTP URLs (if ALLOW_HTTP_REQUESTS is enabled)
|
||||||
- **datasetName** (Optional[str]): Name of the dataset to add data to
|
- **datasetName** (Optional[str]): Name of the dataset to add data to
|
||||||
- **datasetId** (Optional[UUID]): UUID of the dataset to add data to
|
- **datasetId** (Optional[UUID]): UUID of the dataset to add data to
|
||||||
|
|
||||||
|
|
@ -55,7 +97,6 @@ def get_add_router() -> APIRouter:
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
||||||
- GitHub repositories are cloned and all files are processed
|
|
||||||
- HTTP URLs are fetched and their content is processed
|
- HTTP URLs are fetched and their content is processed
|
||||||
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
|
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
|
||||||
"""
|
"""
|
||||||
|
|
@ -66,30 +107,18 @@ def get_add_router() -> APIRouter:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if (
|
if (
|
||||||
isinstance(data, str)
|
isinstance(text_data, str)
|
||||||
and data.startswith("http")
|
and text_data.startswith("http")
|
||||||
and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
|
and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
|
||||||
):
|
):
|
||||||
if "github" in data:
|
# Fetch and store the data from other types of URL using curl
|
||||||
# Perform git clone if the URL is from GitHub
|
response = requests.get(text_data)
|
||||||
repo_name = data.split("/")[-1].replace(".git", "")
|
response.raise_for_status()
|
||||||
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
|
|
||||||
# TODO: Update add call with dataset info
|
|
||||||
await cognee_add(
|
|
||||||
"data://.data/",
|
|
||||||
f"{repo_name}",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fetch and store the data from other types of URL using curl
|
|
||||||
response = requests.get(data)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
file_data = await response.content()
|
file_data = await response.content()
|
||||||
# TODO: Update add call with dataset info
|
return await cognee_add(file_data, datasetName, user=user, dataset_id=datasetId)
|
||||||
return await cognee_add(file_data)
|
|
||||||
else:
|
else:
|
||||||
add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
|
add_run = await cognee_add(text_data, datasetName, user=user, dataset_id=datasetId)
|
||||||
|
|
||||||
return add_run.model_dump()
|
return add_run.model_dump()
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
return JSONResponse(status_code=409, content={"error": str(error)})
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue