refactor: simplify endpoint default values (#1123)
<!-- .github/pull_request_template.md --> ## Description Simplify Cognee endpoints so default dataset ID will be None ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
115585ee9c
commit
022c96de55
5 changed files with 19 additions and 21 deletions
|
|
@ -14,7 +14,7 @@ async def add(
|
|||
node_set: Optional[List[str]] = None,
|
||||
vector_db_config: dict = None,
|
||||
graph_db_config: dict = None,
|
||||
dataset_id: UUID = None,
|
||||
dataset_id: Optional[UUID] = None,
|
||||
):
|
||||
"""
|
||||
Add data to Cognee for knowledge graph processing.
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import os
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import Form, UploadFile, Depends
|
||||
from fastapi import Form, File, UploadFile, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi import APIRouter
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Union, Literal
|
||||
import subprocess
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
import requests
|
||||
|
|
@ -20,9 +20,9 @@ def get_add_router() -> APIRouter:
|
|||
|
||||
@router.post("", response_model=dict)
|
||||
async def add(
|
||||
data: List[UploadFile],
|
||||
data: List[UploadFile] = File(default=None),
|
||||
datasetName: Optional[str] = Form(default=None),
|
||||
datasetId: Optional[UUID] = Form(default=None),
|
||||
datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
|
||||
user: User = Depends(get_authenticated_user),
|
||||
):
|
||||
"""
|
||||
|
|
@ -38,7 +38,7 @@ def get_add_router() -> APIRouter:
|
|||
- GitHub repository URLs (will be cloned and processed)
|
||||
- Regular file uploads
|
||||
- **datasetName** (Optional[str]): Name of the dataset to add data to
|
||||
- **datasetId** (Optional[UUID]): UUID of the dataset to add data to
|
||||
- **datasetId** (Optional[UUID]): UUID of an already existing dataset
|
||||
|
||||
Either datasetName or datasetId must be provided.
|
||||
|
||||
|
|
@ -58,6 +58,7 @@ def get_add_router() -> APIRouter:
|
|||
- GitHub repositories are cloned and all files are processed
|
||||
- HTTP URLs are fetched and their content is processed
|
||||
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
|
||||
- datasetId value can only be the UUID of an already existing dataset
|
||||
"""
|
||||
from cognee.api.v1.add import add as cognee_add
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import asyncio
|
||||
from uuid import UUID
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from typing import List, Optional
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect
|
||||
|
|
@ -10,7 +10,6 @@ from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION
|
|||
from cognee.api.DTO import InDTO
|
||||
from cognee.modules.pipelines.methods import get_pipeline_run
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.modules.users.get_user_db import get_user_db_context
|
||||
from cognee.modules.graph.methods import get_formatted_graph_data
|
||||
|
|
@ -30,9 +29,9 @@ logger = get_logger("api.cognify")
|
|||
|
||||
|
||||
class CognifyPayloadDTO(InDTO):
|
||||
datasets: Optional[List[str]] = None
|
||||
dataset_ids: Optional[List[UUID]] = None
|
||||
run_in_background: Optional[bool] = False
|
||||
datasets: Optional[List[str]] = Field(default=None)
|
||||
dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
|
||||
run_in_background: Optional[bool] = Field(default=False)
|
||||
|
||||
|
||||
def get_cognify_router() -> APIRouter:
|
||||
|
|
@ -57,8 +56,7 @@ def get_cognify_router() -> APIRouter:
|
|||
|
||||
## Request Parameters
|
||||
- **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
|
||||
- **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
|
||||
- **graph_model** (Optional[BaseModel]): Custom Pydantic model defining the knowledge graph schema. Defaults to KnowledgeGraph for general-purpose processing.
|
||||
- **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
|
||||
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
|
||||
|
||||
## Response
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from uuid import UUID
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from pydantic import Field
|
||||
from fastapi import Depends, APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
from cognee.modules.search.types import SearchType
|
||||
|
|
@ -14,11 +15,11 @@ from cognee.modules.users.methods import get_authenticated_user
|
|||
# Note: Datasets sent by name will only map to datasets owned by the request sender
|
||||
# To search for datasets not owned by the request sender dataset UUID is needed
|
||||
class SearchPayloadDTO(InDTO):
|
||||
search_type: SearchType
|
||||
datasets: Optional[list[str]] = None
|
||||
dataset_ids: Optional[list[UUID]] = None
|
||||
query: str
|
||||
top_k: Optional[int] = 10
|
||||
search_type: SearchType = Field(default=SearchType.GRAPH_COMPLETION)
|
||||
datasets: Optional[list[str]] = Field(default=None)
|
||||
dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]])
|
||||
query: str = Field(default="What is in the document?")
|
||||
top_k: Optional[int] = Field(default=10)
|
||||
|
||||
|
||||
def get_search_router() -> APIRouter:
|
||||
|
|
|
|||
|
|
@ -12,9 +12,7 @@ def classify(data: Union[str, BinaryIO], filename: str = None):
|
|||
return TextData(data)
|
||||
|
||||
if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile):
|
||||
return BinaryData(
|
||||
data, str(data.name).split("/")[-1] if hasattr(data, "name") else filename
|
||||
)
|
||||
return BinaryData(data, filename if filename else str(data.name).split("/")[-1])
|
||||
|
||||
try:
|
||||
from s3fs import S3File
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue