48 lines
1.8 KiB
Python
48 lines
1.8 KiB
Python
from typing import Union, BinaryIO, List, Optional
|
|
from cognee.modules.users.models import User
|
|
from cognee.modules.users.methods import get_default_user
|
|
from cognee.modules.pipelines import run_tasks, Task
|
|
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
|
from cognee.infrastructure.databases.relational import (
|
|
create_db_and_tables as create_relational_db_and_tables,
|
|
)
|
|
from cognee.infrastructure.databases.vector.pgvector import (
|
|
create_db_and_tables as create_pgvector_db_and_tables,
|
|
)
|
|
from uuid import uuid5, NAMESPACE_OID
|
|
|
|
|
|
async def add(
|
|
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
|
|
dataset_name: str = "main_dataset",
|
|
user: User = None,
|
|
node_set: Optional[List[str]] = None,
|
|
):
|
|
# Create tables for databases
|
|
await create_relational_db_and_tables()
|
|
await create_pgvector_db_and_tables()
|
|
|
|
# Initialize first_run attribute if it doesn't exist
|
|
if not hasattr(add, "first_run"):
|
|
add.first_run = True
|
|
|
|
if add.first_run:
|
|
from cognee.infrastructure.llm.utils import test_llm_connection, test_embedding_connection
|
|
|
|
# Test LLM and Embedding configuration once before running Cognee
|
|
await test_llm_connection()
|
|
await test_embedding_connection()
|
|
add.first_run = False # Update flag after first run
|
|
|
|
if user is None:
|
|
user = await get_default_user()
|
|
|
|
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)]
|
|
|
|
dataset_id = uuid5(NAMESPACE_OID, dataset_name)
|
|
pipeline = run_tasks(
|
|
tasks=tasks, dataset_id=dataset_id, data=data, pipeline_name="add_pipeline"
|
|
)
|
|
|
|
async for pipeline_status in pipeline:
|
|
print(f"Pipeline run status: {pipeline_status.pipeline_name} - {pipeline_status.status}")
|