cognee/cognee/modules/pipelines/operations/run_tasks.py
Boris 0ce6fad24a
feat: pipeline tasks needs mapping (#690)
<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
2025-04-03 10:52:59 +02:00

136 lines
4.1 KiB
Python

import json
from typing import Any
from uuid import UUID, NAMESPACE_OID, uuid4, uuid5
from cognee.modules.pipelines.operations import (
log_pipeline_run_start,
log_pipeline_run_complete,
log_pipeline_run_error,
)
from cognee.modules.users.methods import get_default_user
from cognee.modules.settings import get_current_settings
from cognee.shared.utils import send_telemetry
from cognee.shared.logging_utils import get_logger
from ..tasks.Task import Task, TaskExecutionCompleted, TaskExecutionErrored, TaskExecutionStarted
from .run_tasks_base import run_tasks_base
logger = get_logger("run_tasks(tasks: [Task], data)")
async def run_tasks_with_telemetry(
tasks: list[Task], data, pipeline_name: str, context: dict = None
):
config = get_current_settings()
logger.debug("\nRunning pipeline with configuration:\n%s\n", json.dumps(config, indent=1))
user = await get_default_user()
try:
logger.info("Pipeline run started: `%s`", pipeline_name)
send_telemetry(
"Pipeline Run Started",
user.id,
additional_properties={
"pipeline_name": str(pipeline_name),
}
| config,
)
async for run_task_info in run_tasks_base(tasks, data, context):
if isinstance(run_task_info, TaskExecutionStarted):
send_telemetry(
"Task Run Started",
user.id,
additional_properties={
"task_name": run_task_info.task.__name__,
}
| config,
)
if isinstance(run_task_info, TaskExecutionCompleted):
send_telemetry(
"Task Run Completed",
user.id,
additional_properties={
"task_name": run_task_info.task.__name__,
}
| config,
)
if isinstance(run_task_info, TaskExecutionErrored):
send_telemetry(
"Task Run Errored",
user.id,
additional_properties={
"task_name": run_task_info.task.__name__,
"error": str(run_task_info.error),
}
| config,
)
logger.error(
"Task run errored: `%s`\n%s\n",
run_task_info.task.__name__,
str(run_task_info.error),
exc_info=True,
)
yield run_task_info
logger.info("Pipeline run completed: `%s`", pipeline_name)
send_telemetry(
"Pipeline Run Completed",
user.id,
additional_properties={
"pipeline_name": str(pipeline_name),
},
)
except Exception as error:
logger.error(
"Pipeline run errored: `%s`\n%s\n",
pipeline_name,
str(error),
exc_info=True,
)
send_telemetry(
"Pipeline Run Errored",
user.id,
additional_properties={
"pipeline_name": str(pipeline_name),
}
| config,
)
raise error
async def run_tasks(
tasks: list[Task],
dataset_id: UUID = None,
data: Any = None,
pipeline_name: str = "unknown_pipeline",
context: dict = None,
):
dataset_id = dataset_id or uuid4()
pipeline_id = uuid5(NAMESPACE_OID, pipeline_name)
pipeline_run = await log_pipeline_run_start(pipeline_id, pipeline_name, dataset_id, data)
yield pipeline_run
pipeline_run_id = pipeline_run.pipeline_run_id
try:
async for _ in run_tasks_with_telemetry(tasks, data, pipeline_id, context):
pass
yield await log_pipeline_run_complete(
pipeline_run_id, pipeline_id, pipeline_name, dataset_id, data
)
except Exception as e:
yield await log_pipeline_run_error(
pipeline_run_id, pipeline_id, pipeline_name, dataset_id, data, e
)
raise e