cognee/cognee/api/v1/add/add.py
2025-04-07 20:33:34 +02:00

48 lines
1.8 KiB
Python

from typing import Union, BinaryIO, List, Optional
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_default_user
from cognee.modules.pipelines import run_tasks, Task
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
from cognee.infrastructure.databases.relational import (
create_db_and_tables as create_relational_db_and_tables,
)
from cognee.infrastructure.databases.vector.pgvector import (
create_db_and_tables as create_pgvector_db_and_tables,
)
from uuid import uuid5, NAMESPACE_OID
async def add(
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
dataset_name: str = "main_dataset",
user: User = None,
node_set: Optional[List[str]] = None,
):
# Create tables for databases
await create_relational_db_and_tables()
await create_pgvector_db_and_tables()
# Initialize first_run attribute if it doesn't exist
if not hasattr(add, "first_run"):
add.first_run = True
if add.first_run:
from cognee.infrastructure.llm.utils import test_llm_connection, test_embedding_connection
# Test LLM and Embedding configuration once before running Cognee
await test_llm_connection()
await test_embedding_connection()
add.first_run = False # Update flag after first run
if user is None:
user = await get_default_user()
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)]
dataset_id = uuid5(NAMESPACE_OID, dataset_name)
pipeline = run_tasks(
tasks=tasks, dataset_id=dataset_id, data=data, pipeline_name="add_pipeline"
)
async for pipeline_status in pipeline:
print(f"Pipeline run status: {pipeline_status.pipeline_name} - {pipeline_status.status}")