cognee/notebooks/cognee_code_graph_demo.ipynb
alekszievr 05ba29af01
Feat: log pipeline status and pass it through pipeline [COG-1214] (#501)
<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced pipeline execution now provides consolidated status feedback
with improved telemetry for start, completion, and error events.
- Automatic generation of unique dataset identifiers offers clearer task
and pipeline run associations.

- **Refactor**
- Task execution has been streamlined with explicit parameter handling
for more structured pipeline processing.
- Interactive examples and demos now return results directly, making
integration and monitoring more accessible.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
2025-02-11 16:41:40 +01:00

195 lines
5.6 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"GRAPHISTRY_USERNAME\"] = input(\"Please enter your graphistry username\")\n",
"os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pathlib\n",
"import cognee\n",
"from cognee.infrastructure.databases.relational import create_db_and_tables\n",
"\n",
"notebook_path = os.path.abspath(\"\")\n",
"data_directory_path = str(\n",
" pathlib.Path(os.path.join(notebook_path, \".data_storage/code_graph\")).resolve()\n",
")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"cognee_directory_path = str(\n",
" pathlib.Path(os.path.join(notebook_path, \".cognee_system/code_graph\")).resolve()\n",
")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"await cognee.prune.prune_data()\n",
"await cognee.prune.prune_system(metadata=True)\n",
"\n",
"await create_db_and_tables()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from os import path\n",
"from pathlib import Path\n",
"from cognee.infrastructure.files.storage import LocalStorage\n",
"import git\n",
"\n",
"notebook_path = path.abspath(\"\")\n",
"repo_clone_location = path.join(notebook_path, \".data/graphrag\")\n",
"\n",
"LocalStorage.remove_all(repo_clone_location)\n",
"\n",
"git.Repo.clone_from(\n",
" \"git@github.com:microsoft/graphrag.git\",\n",
" Path(repo_clone_location),\n",
" branch=\"main\",\n",
" single_branch=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cognee.tasks.repo_processor import (\n",
" enrich_dependency_graph,\n",
" expand_dependency_graph,\n",
" get_repo_file_dependencies,\n",
")\n",
"from cognee.tasks.storage import add_data_points\n",
"from cognee.modules.pipelines.tasks.Task import Task\n",
"\n",
"tasks = [\n",
" Task(get_repo_file_dependencies),\n",
" Task(add_data_points, task_config={\"batch_size\": 50}),\n",
" Task(enrich_dependency_graph, task_config={\"batch_size\": 50}),\n",
" Task(expand_dependency_graph, task_config={\"batch_size\": 50}),\n",
" Task(add_data_points, task_config={\"batch_size\": 50}),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cognee.modules.pipelines import run_tasks\n",
"from uuid import uuid5, NAMESPACE_OID\n",
"\n",
"notebook_path = os.path.abspath(\"\")\n",
"repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n",
"\n",
"pipeline = run_tasks(tasks, uuid5(NAMESPACE_OID, repo_clone_location), repo_clone_location, \"code_graph_pipeline\")\n",
"\n",
"async for result in pipeline:\n",
" print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cognee.shared.utils import render_graph\n",
"\n",
"await render_graph(None, include_nodes=True, include_labels=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Let's check the evaluations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from evals.eval_on_hotpot import eval_on_hotpotQA\n",
"from evals.eval_on_hotpot import answer_with_cognee\n",
"from evals.eval_on_hotpot import answer_without_cognee\n",
"from evals.eval_on_hotpot import eval_answers\n",
"from cognee.base_config import get_base_config\n",
"from pathlib import Path\n",
"from tqdm import tqdm\n",
"import wget\n",
"import json\n",
"import statistics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"answer_provider = answer_with_cognee # For native LLM answers use answer_without_cognee\n",
"num_samples = 10 # With cognee, it takes ~1m10s per sample\n",
"\n",
"base_config = get_base_config()\n",
"data_root_dir = base_config.data_root_directory\n",
"\n",
"if not Path(data_root_dir).exists():\n",
" Path(data_root_dir).mkdir()\n",
"\n",
"filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n",
"if not filepath.exists():\n",
" url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n",
" wget.download(url, out=data_root_dir)\n",
"\n",
"with open(filepath, \"r\") as file:\n",
" dataset = json.load(file)\n",
"\n",
"instances = dataset if not num_samples else dataset[:num_samples]\n",
"answers = []\n",
"for instance in tqdm(instances, desc=\"Getting answers\"):\n",
" answer = answer_provider(instance)\n",
" answers.append(answer)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}