feat: add telemetry logging to pipelines and tasks (#140)
* feat: add telemetry logging to pipelines and tasks * fix: enable telemetry for local environment
This commit is contained in:
parent
41bf8617f1
commit
56868d8a6f
8 changed files with 118 additions and 40 deletions
|
|
@ -65,7 +65,7 @@ export default function DatasetsView({
|
|||
<StatusIcon status={dataset.status} />
|
||||
<DropdownMenu>
|
||||
<Stack gap="1" className={styles.datasetMenu} orientation="vertical">
|
||||
{dataset.status === 'DATASET_PROCESSING_FINISHED' ? (
|
||||
{dataset.status === 'DATASET_PROCESSING_COMPLETED' ? (
|
||||
<CTAButton
|
||||
onClick={(event: React.MouseEvent<HTMLButtonElement>) => handleExploreDataset(event, dataset)}
|
||||
>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
export default function StatusIcon({ status }: { status: 'DATASET_PROCESSING_FINISHED' | string }) {
|
||||
const isSuccess = status === 'DATASET_PROCESSING_FINISHED';
|
||||
export default function StatusIcon({ status }: { status: 'DATASET_PROCESSING_COMPLETED' | string }) {
|
||||
const isSuccess = status === 'DATASET_PROCESSING_COMPLETED';
|
||||
|
||||
return (
|
||||
<div
|
||||
|
|
|
|||
|
|
@ -152,12 +152,13 @@ async def add_files(file_paths: List[str], dataset_name: str, user: User = None)
|
|||
await give_permission_on_document(user, data_id, "write")
|
||||
|
||||
|
||||
send_telemetry("cognee.add EXECUTION STARTED", user_id = user.id)
|
||||
run_info = pipeline.run(
|
||||
data_resources(processed_file_paths, user),
|
||||
table_name = "file_metadata",
|
||||
dataset_name = dataset_name,
|
||||
write_disposition = "merge",
|
||||
)
|
||||
send_telemetry("cognee.add")
|
||||
send_telemetry("cognee.add EXECUTION COMPLETED", user_id = user.id)
|
||||
|
||||
return run_info
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import asyncio
|
|||
import logging
|
||||
from typing import Union
|
||||
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.data.models import Dataset, Data
|
||||
|
|
@ -69,6 +70,8 @@ async def run_cognify_pipeline(dataset: Dataset, user: User):
|
|||
dataset_id = dataset.id
|
||||
dataset_name = generate_dataset_name(dataset.name)
|
||||
|
||||
send_telemetry("cognee.cognify EXECUTION STARTED", user.id)
|
||||
|
||||
async with update_status_lock:
|
||||
task_status = await get_pipeline_status([dataset_id])
|
||||
|
||||
|
|
@ -110,17 +113,21 @@ async def run_cognify_pipeline(dataset: Dataset, user: User):
|
|||
Task(chunk_remove_disconnected), # Remove the obsolete document chunks.
|
||||
]
|
||||
|
||||
pipeline = run_tasks(tasks, data_documents)
|
||||
pipeline = run_tasks(tasks, data_documents, "cognify_pipeline")
|
||||
|
||||
async for result in pipeline:
|
||||
print(result)
|
||||
|
||||
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_FINISHED", {
|
||||
send_telemetry("cognee.cognify EXECUTION COMPLETED", user.id)
|
||||
|
||||
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_COMPLETED", {
|
||||
"dataset_name": dataset_name,
|
||||
"files": document_ids_str,
|
||||
})
|
||||
except Exception as error:
|
||||
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERROR", {
|
||||
send_telemetry("cognee.cognify EXECUTION ERRORED", user.id)
|
||||
|
||||
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERRORED", {
|
||||
"dataset_name": dataset_name,
|
||||
"files": document_ids_str,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ async def search(search_type: str, params: Dict[str, Any], user: User = None) ->
|
|||
|
||||
own_document_ids = await get_document_ids_for_user(user.id)
|
||||
search_params = SearchParameters(search_type = search_type, params = params)
|
||||
search_results = await specific_search([search_params])
|
||||
search_results = await specific_search([search_params], user)
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
|
|
@ -67,7 +67,7 @@ async def search(search_type: str, params: Dict[str, Any], user: User = None) ->
|
|||
return filtered_search_results
|
||||
|
||||
|
||||
async def specific_search(query_params: List[SearchParameters]) -> List:
|
||||
async def specific_search(query_params: List[SearchParameters], user) -> List:
|
||||
search_functions: Dict[SearchType, Callable] = {
|
||||
SearchType.ADJACENT: search_adjacent,
|
||||
SearchType.SUMMARY: search_summary,
|
||||
|
|
@ -78,6 +78,8 @@ async def specific_search(query_params: List[SearchParameters]) -> List:
|
|||
|
||||
search_tasks = []
|
||||
|
||||
send_telemetry("cognee.search EXECUTION STARTED", user.id)
|
||||
|
||||
for search_param in query_params:
|
||||
search_func = search_functions.get(search_param.search_type)
|
||||
if search_func:
|
||||
|
|
@ -88,6 +90,6 @@ async def specific_search(query_params: List[SearchParameters]) -> List:
|
|||
# Use asyncio.gather to run all scheduled tasks concurrently
|
||||
search_results = await asyncio.gather(*search_tasks)
|
||||
|
||||
send_telemetry("cognee.search")
|
||||
send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
|
||||
|
||||
return search_results[0] if len(search_results) == 1 else search_results
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
import inspect
|
||||
import logging
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from ..tasks.Task import Task
|
||||
|
||||
logger = logging.getLogger("run_tasks(tasks: [Task], data)")
|
||||
|
||||
async def run_tasks(tasks: [Task], data = None):
|
||||
async def run_tasks_base(tasks: [Task], data = None, user: User = None):
|
||||
if len(tasks) == 0:
|
||||
yield data
|
||||
return
|
||||
|
|
@ -17,7 +20,10 @@ async def run_tasks(tasks: [Task], data = None):
|
|||
next_task_batch_size = next_task.task_config["batch_size"] if next_task else 1
|
||||
|
||||
if inspect.isasyncgenfunction(running_task.executable):
|
||||
logger.info("Running async generator task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Async generator task started: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Async Generator Task Started", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
try:
|
||||
results = []
|
||||
|
||||
|
|
@ -27,29 +33,42 @@ async def run_tasks(tasks: [Task], data = None):
|
|||
results.append(partial_result)
|
||||
|
||||
if len(results) == next_task_batch_size:
|
||||
async for result in run_tasks(leftover_tasks, results[0] if next_task_batch_size == 1 else results):
|
||||
async for result in run_tasks_base(
|
||||
leftover_tasks,
|
||||
results[0] if next_task_batch_size == 1 else results,
|
||||
user = user,
|
||||
):
|
||||
yield result
|
||||
|
||||
results = []
|
||||
|
||||
if len(results) > 0:
|
||||
async for result in run_tasks(leftover_tasks, results):
|
||||
async for result in run_tasks_base(leftover_tasks, results, user):
|
||||
yield result
|
||||
|
||||
results = []
|
||||
|
||||
logger.info("Finished async generator task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Async generator task completed: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Async Generator Task Completed", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"Error occurred while running async generator task: `%s`\n%s\n",
|
||||
"Async generator task errored: `%s`\n%s\n",
|
||||
running_task.executable.__name__,
|
||||
str(error),
|
||||
exc_info = True,
|
||||
)
|
||||
send_telemetry("Async Generator Task Errored", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
raise error
|
||||
|
||||
elif inspect.isgeneratorfunction(running_task.executable):
|
||||
logger.info("Running generator task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Generator task started: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Generator Task Started", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
try:
|
||||
results = []
|
||||
|
||||
|
|
@ -57,59 +76,112 @@ async def run_tasks(tasks: [Task], data = None):
|
|||
results.append(partial_result)
|
||||
|
||||
if len(results) == next_task_batch_size:
|
||||
async for result in run_tasks(leftover_tasks, results[0] if next_task_batch_size == 1 else results):
|
||||
async for result in run_tasks_base(leftover_tasks, results[0] if next_task_batch_size == 1 else results, user):
|
||||
yield result
|
||||
|
||||
results = []
|
||||
|
||||
if len(results) > 0:
|
||||
async for result in run_tasks(leftover_tasks, results):
|
||||
async for result in run_tasks_base(leftover_tasks, results, user):
|
||||
yield result
|
||||
|
||||
results = []
|
||||
|
||||
logger.info("Finished generator task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Generator task completed: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Generator Task Completed", user_id = user.id, additional_properties = {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"Error occurred while running generator task: `%s`\n%s\n",
|
||||
"Generator task errored: `%s`\n%s\n",
|
||||
running_task.executable.__name__,
|
||||
str(error),
|
||||
exc_info = True,
|
||||
)
|
||||
send_telemetry("Generator Task Errored", user_id = user.id, additional_properties = {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
raise error
|
||||
|
||||
elif inspect.iscoroutinefunction(running_task.executable):
|
||||
logger.info("Running coroutine task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Coroutine task started: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Coroutine Task Started", user_id = user.id, additional_properties = {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
try:
|
||||
task_result = await running_task.run(*args)
|
||||
|
||||
async for result in run_tasks(leftover_tasks, task_result):
|
||||
async for result in run_tasks_base(leftover_tasks, task_result, user):
|
||||
yield result
|
||||
|
||||
logger.info("Finished coroutine task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Coroutine task completed: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Coroutine Task Completed", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"Error occurred while running coroutine task: `%s`\n%s\n",
|
||||
"Coroutine task errored: `%s`\n%s\n",
|
||||
running_task.executable.__name__,
|
||||
str(error),
|
||||
exc_info = True,
|
||||
)
|
||||
send_telemetry("Coroutine Task Errored", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
raise error
|
||||
|
||||
elif inspect.isfunction(running_task.executable):
|
||||
logger.info("Running function task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Function task started: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Function Task Started", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
try:
|
||||
task_result = running_task.run(*args)
|
||||
|
||||
async for result in run_tasks(leftover_tasks, task_result):
|
||||
async for result in run_tasks_base(leftover_tasks, task_result, user):
|
||||
yield result
|
||||
|
||||
logger.info("Finished function task: `%s`", running_task.executable.__name__)
|
||||
logger.info("Function task completed: `%s`", running_task.executable.__name__)
|
||||
send_telemetry("Function Task Completed", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"Error occurred while running function task: `%s`\n%s\n",
|
||||
"Function task errored: `%s`\n%s\n",
|
||||
running_task.executable.__name__,
|
||||
str(error),
|
||||
exc_info = True,
|
||||
)
|
||||
send_telemetry("Function Task Errored", user.id, {
|
||||
"task_name": running_task.executable.__name__,
|
||||
})
|
||||
raise error
|
||||
|
||||
async def run_tasks(tasks: [Task], data = None, pipeline_name: str = "default_pipeline"):
|
||||
user = await get_default_user()
|
||||
|
||||
try:
|
||||
logger.info("Pipeline run started: `%s`", pipeline_name)
|
||||
send_telemetry("Pipeline Run Started", user.id, {
|
||||
"pipeline_name": pipeline_name,
|
||||
})
|
||||
|
||||
async for result in run_tasks_base(tasks, data, user):
|
||||
yield result
|
||||
|
||||
logger.info("Pipeline run completed: `%s`", pipeline_name)
|
||||
send_telemetry("Pipeline Run Completed", user.id, {
|
||||
"pipeline_name": pipeline_name,
|
||||
})
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"Pipeline run errored: `%s`\n%s\n",
|
||||
pipeline_name,
|
||||
str(error),
|
||||
exc_info = True,
|
||||
)
|
||||
send_telemetry("Pipeline Run Errored", user.id, {
|
||||
"pipeline_name": pipeline_name,
|
||||
})
|
||||
|
||||
raise error
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
""" This module contains utility functions for the cognee. """
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
import datetime
|
||||
import graphistry
|
||||
import networkx as nx
|
||||
|
|
@ -14,25 +12,23 @@ from posthog import Posthog
|
|||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
def send_telemetry(event_name: str):
|
||||
def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
|
||||
if os.getenv("TELEMETRY_DISABLED"):
|
||||
print("Telemetry is disabled.")
|
||||
logging.info("Telemetry is disabled.")
|
||||
return
|
||||
|
||||
env = os.getenv("ENV")
|
||||
if env in ["local", "test", "dev"]:
|
||||
if env in ["test", "dev"]:
|
||||
return
|
||||
|
||||
posthog = Posthog(
|
||||
project_api_key = "phc_bbR86N876kwub62Lr3dhQ7zIeRyMMMm0fxXqxPqzLm3",
|
||||
project_api_key = "phc_UB1YVere1KtJg1MFxAo6ABfpkwN3OxCvGNDkMTjvH0",
|
||||
host = "https://eu.i.posthog.com"
|
||||
)
|
||||
|
||||
user_id = str(uuid.uuid4())
|
||||
current_time = datetime.datetime.now()
|
||||
properties = {
|
||||
"time": current_time.strftime("%m/%d/%Y")
|
||||
"time": current_time.strftime("%m/%d/%Y"),
|
||||
**additional_properties,
|
||||
}
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ echo "Environment: $ENVIRONMENT"
|
|||
|
||||
echo "Starting Gunicorn"
|
||||
|
||||
if [ "$ENVIRONMENT" = "local" ]; then
|
||||
if [ "$ENVIRONMENT" = "dev" ]; then
|
||||
if [ "$DEBUG" = true ]; then
|
||||
echo "Waiting for the debugger to attach..."
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue