cognee/cognitive_architecture/api/v1/add/add_standalone.py
2024-03-12 22:55:56 +01:00

53 lines
1.5 KiB
Python

import asyncio
from uuid import UUID, uuid4
from typing import Union, BinaryIO, List
import cognitive_architecture.modules.ingestion as ingestion
from cognitive_architecture.infrastructure import infrastructure_config
class DatasetException(Exception):
message: str
def __init__(self, message: str):
self.message = message
async def add_standalone(
data: Union[str, BinaryIO, List[Union[str, BinaryIO]]],
dataset_id: UUID = uuid4(),
dataset_name: str = None
):
db_engine = infrastructure_config.get_config()["database_engine"]
if db_engine.is_db_done is not True:
await db_engine.ensure_tables()
if not data:
raise DatasetException("Data must be provided to cognee.add(data: str)")
if isinstance(data, list):
promises = []
for data_item in data:
promises.append(add(data_item, dataset_id, dataset_name))
results = await asyncio.gather(*promises)
return results
if is_data_path(data):
with open(data.replace("file://", ""), "rb") as file:
return await add(file, dataset_id, dataset_name)
classified_data = ingestion.classify(data)
data_id = ingestion.identify(classified_data)
await ingestion.save(dataset_id, dataset_name, data_id, classified_data)
return dataset_id
# await ingestion.vectorize(dataset_id, dataset_name, data_id, classified_data)
def is_data_path(data: str) -> bool:
return False if not isinstance(data, str) else data.startswith("file://")