<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced pipeline execution now provides consolidated status feedback with improved telemetry for start, completion, and error events. - Automatic generation of unique dataset identifiers offers clearer task and pipeline run associations. - **Refactor** - Task execution has been streamlined with explicit parameter handling for more structured pipeline processing. - Interactive examples and demos now return results directly, making integration and monitoring more accessible. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
195 lines
5.6 KiB
Text
195 lines
5.6 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"\n",
|
|
"os.environ[\"GRAPHISTRY_USERNAME\"] = input(\"Please enter your graphistry username\")\n",
|
|
"os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import pathlib\n",
|
|
"import cognee\n",
|
|
"from cognee.infrastructure.databases.relational import create_db_and_tables\n",
|
|
"\n",
|
|
"notebook_path = os.path.abspath(\"\")\n",
|
|
"data_directory_path = str(\n",
|
|
" pathlib.Path(os.path.join(notebook_path, \".data_storage/code_graph\")).resolve()\n",
|
|
")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"cognee_directory_path = str(\n",
|
|
" pathlib.Path(os.path.join(notebook_path, \".cognee_system/code_graph\")).resolve()\n",
|
|
")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"await cognee.prune.prune_data()\n",
|
|
"await cognee.prune.prune_system(metadata=True)\n",
|
|
"\n",
|
|
"await create_db_and_tables()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"from pathlib import Path\n",
|
|
"from cognee.infrastructure.files.storage import LocalStorage\n",
|
|
"import git\n",
|
|
"\n",
|
|
"notebook_path = path.abspath(\"\")\n",
|
|
"repo_clone_location = path.join(notebook_path, \".data/graphrag\")\n",
|
|
"\n",
|
|
"LocalStorage.remove_all(repo_clone_location)\n",
|
|
"\n",
|
|
"git.Repo.clone_from(\n",
|
|
" \"git@github.com:microsoft/graphrag.git\",\n",
|
|
" Path(repo_clone_location),\n",
|
|
" branch=\"main\",\n",
|
|
" single_branch=True,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from cognee.tasks.repo_processor import (\n",
|
|
" enrich_dependency_graph,\n",
|
|
" expand_dependency_graph,\n",
|
|
" get_repo_file_dependencies,\n",
|
|
")\n",
|
|
"from cognee.tasks.storage import add_data_points\n",
|
|
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
|
"\n",
|
|
"tasks = [\n",
|
|
" Task(get_repo_file_dependencies),\n",
|
|
" Task(add_data_points, task_config={\"batch_size\": 50}),\n",
|
|
" Task(enrich_dependency_graph, task_config={\"batch_size\": 50}),\n",
|
|
" Task(expand_dependency_graph, task_config={\"batch_size\": 50}),\n",
|
|
" Task(add_data_points, task_config={\"batch_size\": 50}),\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from cognee.modules.pipelines import run_tasks\n",
|
|
"from uuid import uuid5, NAMESPACE_OID\n",
|
|
"\n",
|
|
"notebook_path = os.path.abspath(\"\")\n",
|
|
"repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n",
|
|
"\n",
|
|
"pipeline = run_tasks(tasks, uuid5(NAMESPACE_OID, repo_clone_location), repo_clone_location, \"code_graph_pipeline\")\n",
|
|
"\n",
|
|
"async for result in pipeline:\n",
|
|
" print(result)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from cognee.shared.utils import render_graph\n",
|
|
"\n",
|
|
"await render_graph(None, include_nodes=True, include_labels=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Let's check the evaluations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from evals.eval_on_hotpot import eval_on_hotpotQA\n",
|
|
"from evals.eval_on_hotpot import answer_with_cognee\n",
|
|
"from evals.eval_on_hotpot import answer_without_cognee\n",
|
|
"from evals.eval_on_hotpot import eval_answers\n",
|
|
"from cognee.base_config import get_base_config\n",
|
|
"from pathlib import Path\n",
|
|
"from tqdm import tqdm\n",
|
|
"import wget\n",
|
|
"import json\n",
|
|
"import statistics"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"answer_provider = answer_with_cognee # For native LLM answers use answer_without_cognee\n",
|
|
"num_samples = 10 # With cognee, it takes ~1m10s per sample\n",
|
|
"\n",
|
|
"base_config = get_base_config()\n",
|
|
"data_root_dir = base_config.data_root_directory\n",
|
|
"\n",
|
|
"if not Path(data_root_dir).exists():\n",
|
|
" Path(data_root_dir).mkdir()\n",
|
|
"\n",
|
|
"filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n",
|
|
"if not filepath.exists():\n",
|
|
" url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n",
|
|
" wget.download(url, out=data_root_dir)\n",
|
|
"\n",
|
|
"with open(filepath, \"r\") as file:\n",
|
|
" dataset = json.load(file)\n",
|
|
"\n",
|
|
"instances = dataset if not num_samples else dataset[:num_samples]\n",
|
|
"answers = []\n",
|
|
"for instance in tqdm(instances, desc=\"Getting answers\"):\n",
|
|
" answer = answer_provider(instance)\n",
|
|
" answers.append(answer)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|