Feat: log pipeline status and pass it through pipeline [COG-1214] (#501)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced pipeline execution now provides consolidated status feedback with improved telemetry for start, completion, and error events. - Automatic generation of unique dataset identifiers offers clearer task and pipeline run associations. - **Refactor** - Task execution has been streamlined with explicit parameter handling for more structured pipeline processing. - Interactive examples and demos now return results directly, making integration and monitoring more accessible. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
This commit is contained in:
parent
6a0c0e3ef8
commit
05ba29af01
17 changed files with 195 additions and 94 deletions
|
|
@ -10,7 +10,7 @@ repos:
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
# Ruff version.
|
# Ruff version.
|
||||||
rev: v0.9.0
|
rev: v0.9.5
|
||||||
hooks:
|
hooks:
|
||||||
# Run the linter.
|
# Run the linter.
|
||||||
- id: ruff
|
- id: ruff
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from cognee.infrastructure.databases.relational import (
|
||||||
from cognee.infrastructure.databases.vector.pgvector import (
|
from cognee.infrastructure.databases.vector.pgvector import (
|
||||||
create_db_and_tables as create_pgvector_db_and_tables,
|
create_db_and_tables as create_pgvector_db_and_tables,
|
||||||
)
|
)
|
||||||
|
from uuid import uuid5, NAMESPACE_OID
|
||||||
|
|
||||||
|
|
||||||
async def add(
|
async def add(
|
||||||
|
|
@ -37,7 +38,10 @@ async def add(
|
||||||
|
|
||||||
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user)]
|
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user)]
|
||||||
|
|
||||||
pipeline = run_tasks(tasks, data, "add_pipeline")
|
dataset_id = uuid5(NAMESPACE_OID, dataset_name)
|
||||||
|
pipeline = run_tasks(
|
||||||
|
tasks=tasks, dataset_id=dataset_id, data=data, pipeline_name="add_pipeline"
|
||||||
|
)
|
||||||
|
|
||||||
async for result in pipeline:
|
async for result in pipeline:
|
||||||
print(result)
|
print(result)
|
||||||
|
|
|
||||||
|
|
@ -69,9 +69,19 @@ async def run_code_graph_pipeline(repo_path, include_docs=True):
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
pipeline_run_status = None
|
||||||
if include_docs:
|
if include_docs:
|
||||||
async for result in run_tasks(non_code_tasks, repo_path):
|
non_code_pipeline_run = run_tasks(non_code_tasks, None, repo_path, "cognify_pipeline")
|
||||||
yield result
|
async for run_status in non_code_pipeline_run:
|
||||||
|
pipeline_run_status = run_status
|
||||||
|
|
||||||
async for result in run_tasks(tasks, repo_path, "cognify_code_pipeline"):
|
from cognee.modules.data.methods import get_datasets
|
||||||
yield result
|
|
||||||
|
existing_datasets = await get_datasets(user.id)
|
||||||
|
code_pipeline_run = run_tasks(
|
||||||
|
tasks, existing_datasets[0].id, repo_path, "cognify_code_pipeline"
|
||||||
|
)
|
||||||
|
async for run_status in code_pipeline_run:
|
||||||
|
pipeline_run_status = run_status
|
||||||
|
|
||||||
|
return pipeline_run_status
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ from cognee.modules.data.models import Data, Dataset
|
||||||
from cognee.modules.pipelines import run_tasks
|
from cognee.modules.pipelines import run_tasks
|
||||||
from cognee.modules.pipelines.models import PipelineRunStatus
|
from cognee.modules.pipelines.models import PipelineRunStatus
|
||||||
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
|
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
|
||||||
from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status
|
|
||||||
from cognee.modules.pipelines.tasks.Task import Task
|
from cognee.modules.pipelines.tasks.Task import Task
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
|
|
@ -71,8 +70,6 @@ async def cognify(
|
||||||
async def run_cognify_pipeline(dataset: Dataset, user: User, tasks: list[Task]):
|
async def run_cognify_pipeline(dataset: Dataset, user: User, tasks: list[Task]):
|
||||||
data_documents: list[Data] = await get_dataset_data(dataset_id=dataset.id)
|
data_documents: list[Data] = await get_dataset_data(dataset_id=dataset.id)
|
||||||
|
|
||||||
document_ids_str = [str(document.id) for document in data_documents]
|
|
||||||
|
|
||||||
dataset_id = dataset.id
|
dataset_id = dataset.id
|
||||||
dataset_name = generate_dataset_name(dataset.name)
|
dataset_name = generate_dataset_name(dataset.name)
|
||||||
|
|
||||||
|
|
@ -82,21 +79,12 @@ async def run_cognify_pipeline(dataset: Dataset, user: User, tasks: list[Task]):
|
||||||
task_status = await get_pipeline_status([dataset_id])
|
task_status = await get_pipeline_status([dataset_id])
|
||||||
|
|
||||||
if (
|
if (
|
||||||
dataset_id in task_status
|
str(dataset_id) in task_status
|
||||||
and task_status[dataset_id] == PipelineRunStatus.DATASET_PROCESSING_STARTED
|
and task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED
|
||||||
):
|
):
|
||||||
logger.info("Dataset %s is already being processed.", dataset_name)
|
logger.info("Dataset %s is already being processed.", dataset_name)
|
||||||
return
|
return
|
||||||
|
|
||||||
await log_pipeline_status(
|
|
||||||
dataset_id,
|
|
||||||
PipelineRunStatus.DATASET_PROCESSING_STARTED,
|
|
||||||
{
|
|
||||||
"dataset_name": dataset_name,
|
|
||||||
"files": document_ids_str,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not isinstance(tasks, list):
|
if not isinstance(tasks, list):
|
||||||
raise ValueError("Tasks must be a list")
|
raise ValueError("Tasks must be a list")
|
||||||
|
|
@ -105,32 +93,17 @@ async def run_cognify_pipeline(dataset: Dataset, user: User, tasks: list[Task]):
|
||||||
if not isinstance(task, Task):
|
if not isinstance(task, Task):
|
||||||
raise ValueError(f"Task {task} is not an instance of Task")
|
raise ValueError(f"Task {task} is not an instance of Task")
|
||||||
|
|
||||||
pipeline = run_tasks(tasks, data_documents, "cognify_pipeline")
|
pipeline_run = run_tasks(tasks, dataset.id, data_documents, "cognify_pipeline")
|
||||||
|
pipeline_run_status = None
|
||||||
|
|
||||||
async for result in pipeline:
|
async for run_status in pipeline_run:
|
||||||
print(result)
|
pipeline_run_status = run_status
|
||||||
|
|
||||||
send_telemetry("cognee.cognify EXECUTION COMPLETED", user.id)
|
send_telemetry("cognee.cognify EXECUTION COMPLETED", user.id)
|
||||||
|
return pipeline_run_status
|
||||||
|
|
||||||
await log_pipeline_status(
|
|
||||||
dataset_id,
|
|
||||||
PipelineRunStatus.DATASET_PROCESSING_COMPLETED,
|
|
||||||
{
|
|
||||||
"dataset_name": dataset_name,
|
|
||||||
"files": document_ids_str,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
send_telemetry("cognee.cognify EXECUTION ERRORED", user.id)
|
send_telemetry("cognee.cognify EXECUTION ERRORED", user.id)
|
||||||
|
|
||||||
await log_pipeline_status(
|
|
||||||
dataset_id,
|
|
||||||
PipelineRunStatus.DATASET_PROCESSING_ERRORED,
|
|
||||||
{
|
|
||||||
"dataset_name": dataset_name,
|
|
||||||
"files": document_ids_str,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
raise error
|
raise error
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import enum
|
import enum
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from sqlalchemy import Column, DateTime, JSON, Enum, UUID
|
from sqlalchemy import Column, DateTime, JSON, Enum, UUID, String
|
||||||
from cognee.infrastructure.databases.relational import Base
|
from cognee.infrastructure.databases.relational import Base
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -19,6 +19,7 @@ class PipelineRun(Base):
|
||||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||||
|
|
||||||
status = Column(Enum(PipelineRunStatus))
|
status = Column(Enum(PipelineRunStatus))
|
||||||
|
pipeline_run_id = Column(UUID, index=True)
|
||||||
run_id = Column(UUID, index=True)
|
pipeline_id = Column(UUID, index=True)
|
||||||
|
dataset_id = Column(UUID, index=True)
|
||||||
run_info = Column(JSON)
|
run_info = Column(JSON)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
from .log_pipeline_run_start import log_pipeline_run_start
|
||||||
|
from .log_pipeline_run_complete import log_pipeline_run_complete
|
||||||
|
from .log_pipeline_run_error import log_pipeline_run_error
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from sqlalchemy import func, select
|
from sqlalchemy import select, func
|
||||||
from sqlalchemy.orm import aliased
|
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from ..models import PipelineRun
|
from ..models import PipelineRun
|
||||||
|
from sqlalchemy.orm import aliased
|
||||||
|
|
||||||
|
|
||||||
async def get_pipeline_status(pipeline_ids: list[UUID]):
|
async def get_pipeline_status(dataset_ids: list[UUID]):
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
|
|
@ -14,12 +14,12 @@ async def get_pipeline_status(pipeline_ids: list[UUID]):
|
||||||
PipelineRun,
|
PipelineRun,
|
||||||
func.row_number()
|
func.row_number()
|
||||||
.over(
|
.over(
|
||||||
partition_by=PipelineRun.run_id,
|
partition_by=PipelineRun.dataset_id,
|
||||||
order_by=PipelineRun.created_at.desc(),
|
order_by=PipelineRun.created_at.desc(),
|
||||||
)
|
)
|
||||||
.label("rn"),
|
.label("rn"),
|
||||||
)
|
)
|
||||||
.filter(PipelineRun.run_id.in_(pipeline_ids))
|
.filter(PipelineRun.dataset_id.in_(dataset_ids))
|
||||||
.subquery()
|
.subquery()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -29,16 +29,6 @@ async def get_pipeline_status(pipeline_ids: list[UUID]):
|
||||||
|
|
||||||
runs = (await session.execute(latest_runs)).scalars().all()
|
runs = (await session.execute(latest_runs)).scalars().all()
|
||||||
|
|
||||||
pipeline_statuses = {str(run.run_id): run.status for run in runs}
|
pipeline_statuses = {str(run.dataset_id): run.status for run in runs}
|
||||||
|
|
||||||
return pipeline_statuses
|
return pipeline_statuses
|
||||||
|
|
||||||
# f"""SELECT data_id, status
|
|
||||||
# FROM (
|
|
||||||
# SELECT data_id, status, ROW_NUMBER() OVER (PARTITION BY data_id ORDER BY created_at DESC) as rn
|
|
||||||
# FROM cognee.cognee.task_runs
|
|
||||||
# WHERE data_id IN ({formatted_data_ids})
|
|
||||||
# ) t
|
|
||||||
# WHERE rn = 1;"""
|
|
||||||
|
|
||||||
# return { dataset["data_id"]: dataset["status"] for dataset in datasets_statuses }
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.data.models import Data
|
||||||
|
from cognee.modules.pipelines.models import PipelineRun, PipelineRunStatus
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
async def log_pipeline_run_complete(
|
||||||
|
pipeline_run_id: UUID, pipeline_id: str, dataset_id: UUID, data: Any
|
||||||
|
):
|
||||||
|
if not data:
|
||||||
|
data_info = "None"
|
||||||
|
elif isinstance(data, list) and all(isinstance(item, Data) for item in data):
|
||||||
|
data_info = [str(item.id) for item in data]
|
||||||
|
else:
|
||||||
|
data_info = str(data)
|
||||||
|
|
||||||
|
pipeline_run = PipelineRun(
|
||||||
|
pipeline_run_id=pipeline_run_id,
|
||||||
|
pipeline_id=pipeline_id,
|
||||||
|
status=PipelineRunStatus.DATASET_PROCESSING_COMPLETED,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
run_info={
|
||||||
|
"data": data_info,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
session.add(pipeline_run)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
return pipeline_run
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.data.models import Data
|
||||||
|
from cognee.modules.pipelines.models import PipelineRun, PipelineRunStatus
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
async def log_pipeline_run_error(
|
||||||
|
pipeline_run_id: UUID, pipeline_id: str, dataset_id: UUID, data: Any, e: Exception
|
||||||
|
):
|
||||||
|
if not data:
|
||||||
|
data_info = "None"
|
||||||
|
elif isinstance(data, list) and all(isinstance(item, Data) for item in data):
|
||||||
|
data_info = [str(item.id) for item in data]
|
||||||
|
else:
|
||||||
|
data_info = str(data)
|
||||||
|
|
||||||
|
pipeline_run = PipelineRun(
|
||||||
|
pipeline_run_id=pipeline_run_id,
|
||||||
|
pipeline_id=pipeline_id,
|
||||||
|
status=PipelineRunStatus.DATASET_PROCESSING_ERRORED,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
run_info={
|
||||||
|
"data": data_info,
|
||||||
|
"error": str(e),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
session.add(pipeline_run)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
return pipeline_run
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.data.models import Data
|
||||||
|
from cognee.modules.pipelines.models import PipelineRun, PipelineRunStatus
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
async def log_pipeline_run_start(pipeline_id: str, dataset_id: UUID, data: Any):
|
||||||
|
if not data:
|
||||||
|
data_info = "None"
|
||||||
|
elif isinstance(data, list) and all(isinstance(item, Data) for item in data):
|
||||||
|
data_info = [str(item.id) for item in data]
|
||||||
|
else:
|
||||||
|
data_info = str(data)
|
||||||
|
|
||||||
|
pipeline_run_id = uuid4()
|
||||||
|
|
||||||
|
pipeline_run = PipelineRun(
|
||||||
|
pipeline_run_id=pipeline_run_id,
|
||||||
|
pipeline_id=pipeline_id,
|
||||||
|
status=PipelineRunStatus.DATASET_PROCESSING_STARTED,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
run_info={
|
||||||
|
"data": data_info,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
session.add(pipeline_run)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
return pipeline_run
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
from uuid import UUID
|
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
||||||
from ..models.PipelineRun import PipelineRun
|
|
||||||
|
|
||||||
|
|
||||||
async def log_pipeline_status(run_id: UUID, status: str, run_info: dict):
|
|
||||||
db_engine = get_relational_engine()
|
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
|
||||||
session.add(
|
|
||||||
PipelineRun(
|
|
||||||
run_id=run_id,
|
|
||||||
status=status,
|
|
||||||
run_info=run_info,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
await session.commit()
|
|
||||||
|
|
@ -1,11 +1,19 @@
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
from cognee.modules.pipelines.operations import (
|
||||||
|
log_pipeline_run_start,
|
||||||
|
log_pipeline_run_complete,
|
||||||
|
log_pipeline_run_error,
|
||||||
|
)
|
||||||
from cognee.modules.settings import get_current_settings
|
from cognee.modules.settings import get_current_settings
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.shared.utils import send_telemetry
|
from cognee.shared.utils import send_telemetry
|
||||||
|
from uuid import uuid5, NAMESPACE_OID
|
||||||
|
|
||||||
from ..tasks.Task import Task
|
from ..tasks.Task import Task
|
||||||
|
|
||||||
|
|
@ -261,6 +269,20 @@ async def run_tasks_with_telemetry(tasks: list[Task], data, pipeline_name: str):
|
||||||
raise error
|
raise error
|
||||||
|
|
||||||
|
|
||||||
async def run_tasks(tasks: list[Task], data=None, pipeline_name: str = "default_pipeline"):
|
async def run_tasks(tasks: list[Task], dataset_id: UUID, data: Any, pipeline_name: str):
|
||||||
async for result in run_tasks_with_telemetry(tasks, data, pipeline_name):
|
pipeline_id = uuid5(NAMESPACE_OID, pipeline_name)
|
||||||
yield result
|
|
||||||
|
pipeline_run = await log_pipeline_run_start(pipeline_id, dataset_id, data)
|
||||||
|
|
||||||
|
yield pipeline_run
|
||||||
|
pipeline_run_id = pipeline_run.pipeline_run_id
|
||||||
|
|
||||||
|
try:
|
||||||
|
async for _ in run_tasks_with_telemetry(tasks, data, pipeline_id):
|
||||||
|
pass
|
||||||
|
|
||||||
|
yield await log_pipeline_run_complete(pipeline_run_id, pipeline_id, dataset_id, data)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
yield await log_pipeline_run_error(pipeline_run_id, pipeline_id, dataset_id, data, e)
|
||||||
|
raise e
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
|
|
||||||
from cognee.modules.pipelines.operations.run_tasks import run_tasks
|
from cognee.modules.pipelines.operations.run_tasks import run_tasks_base
|
||||||
from cognee.modules.pipelines.tasks.Task import Task
|
from cognee.modules.pipelines.tasks.Task import Task
|
||||||
|
from cognee.modules.users.methods import get_default_user
|
||||||
|
|
||||||
|
|
||||||
async def pipeline(data_queue):
|
async def pipeline(data_queue):
|
||||||
|
|
@ -19,13 +20,15 @@ async def pipeline(data_queue):
|
||||||
async def multiply_by_two(num):
|
async def multiply_by_two(num):
|
||||||
yield num * 2
|
yield num * 2
|
||||||
|
|
||||||
tasks_run = run_tasks(
|
user = await get_default_user()
|
||||||
|
tasks_run = run_tasks_base(
|
||||||
[
|
[
|
||||||
Task(queue_consumer),
|
Task(queue_consumer),
|
||||||
Task(add_one),
|
Task(add_one),
|
||||||
Task(multiply_by_two),
|
Task(multiply_by_two),
|
||||||
],
|
],
|
||||||
pipeline_name="test_run_tasks_from_queue",
|
data=None,
|
||||||
|
user=user,
|
||||||
)
|
)
|
||||||
|
|
||||||
results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||||
|
|
@ -50,3 +53,7 @@ async def run_queue():
|
||||||
|
|
||||||
def test_run_tasks_from_queue():
|
def test_run_tasks_from_queue():
|
||||||
asyncio.run(run_queue())
|
asyncio.run(run_queue())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(run_queue())
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from cognee.modules.pipelines.operations.run_tasks import run_tasks
|
from cognee.modules.pipelines.operations.run_tasks import run_tasks_base
|
||||||
from cognee.modules.pipelines.tasks.Task import Task
|
from cognee.modules.pipelines.tasks.Task import Task
|
||||||
|
from cognee.modules.users.methods import get_default_user
|
||||||
|
|
||||||
|
|
||||||
async def run_and_check_tasks():
|
async def run_and_check_tasks():
|
||||||
|
|
@ -19,15 +20,16 @@ async def run_and_check_tasks():
|
||||||
async def add_one_single(num):
|
async def add_one_single(num):
|
||||||
yield num + 1
|
yield num + 1
|
||||||
|
|
||||||
pipeline = run_tasks(
|
user = await get_default_user()
|
||||||
|
pipeline = run_tasks_base(
|
||||||
[
|
[
|
||||||
Task(number_generator),
|
Task(number_generator),
|
||||||
Task(add_one, task_config={"batch_size": 5}),
|
Task(add_one, task_config={"batch_size": 5}),
|
||||||
Task(multiply_by_two, task_config={"batch_size": 1}),
|
Task(multiply_by_two, task_config={"batch_size": 1}),
|
||||||
Task(add_one_single),
|
Task(add_one_single),
|
||||||
],
|
],
|
||||||
10,
|
data=10,
|
||||||
pipeline_name="test_run_tasks",
|
user=user,
|
||||||
)
|
)
|
||||||
|
|
||||||
results = [5, 7, 9, 11, 13, 15, 17, 19, 21, 23]
|
results = [5, 7, 9, 11, 13, 15, 17, 19, 21, 23]
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,7 @@ from cognee.shared.utils import setup_logging
|
||||||
|
|
||||||
|
|
||||||
async def main(repo_path, include_docs):
|
async def main(repo_path, include_docs):
|
||||||
async for result in run_code_graph_pipeline(repo_path, include_docs):
|
return await run_code_graph_pipeline(repo_path, include_docs)
|
||||||
print(result)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
|
|
|
||||||
|
|
@ -93,11 +93,12 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from cognee.modules.pipelines import run_tasks\n",
|
"from cognee.modules.pipelines import run_tasks\n",
|
||||||
|
"from uuid import uuid5, NAMESPACE_OID\n",
|
||||||
"\n",
|
"\n",
|
||||||
"notebook_path = os.path.abspath(\"\")\n",
|
"notebook_path = os.path.abspath(\"\")\n",
|
||||||
"repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n",
|
"repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pipeline = run_tasks(tasks, repo_clone_location, \"code_graph_pipeline\")\n",
|
"pipeline = run_tasks(tasks, uuid5(NAMESPACE_OID, repo_clone_location), repo_clone_location, \"code_graph_pipeline\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"async for result in pipeline:\n",
|
"async for result in pipeline:\n",
|
||||||
" print(result)"
|
" print(result)"
|
||||||
|
|
@ -117,7 +118,9 @@
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": "# Let's check the evaluations"
|
"source": [
|
||||||
|
"# Let's check the evaluations"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
|
|
||||||
|
|
@ -674,10 +674,12 @@
|
||||||
" Task(add_data_points, task_config = { \"batch_size\": 10 }),\n",
|
" Task(add_data_points, task_config = { \"batch_size\": 10 }),\n",
|
||||||
" ]\n",
|
" ]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" pipeline = run_tasks(tasks, data_documents)\n",
|
" pipeline_run = run_tasks(tasks, dataset.id, data_documents, \"cognify_pipeline\")\n",
|
||||||
|
" pipeline_run_status = None\n",
|
||||||
|
"\n",
|
||||||
|
" async for run_status in pipeline_run:\n",
|
||||||
|
" pipeline_run_status = run_status\n",
|
||||||
"\n",
|
"\n",
|
||||||
" async for result in pipeline:\n",
|
|
||||||
" print(result)\n",
|
|
||||||
" except Exception as error:\n",
|
" except Exception as error:\n",
|
||||||
" raise error\n"
|
" raise error\n"
|
||||||
]
|
]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue