{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"GRAPHISTRY_USERNAME\"] = input(\"Please enter your graphistry username\")\n", "os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pathlib\n", "import cognee\n", "from cognee.infrastructure.databases.relational import create_db_and_tables\n", "\n", "notebook_path = os.path.abspath(\"\")\n", "data_directory_path = str(\n", " pathlib.Path(os.path.join(notebook_path, \".data_storage/code_graph\")).resolve()\n", ")\n", "cognee.config.data_root_directory(data_directory_path)\n", "cognee_directory_path = str(\n", " pathlib.Path(os.path.join(notebook_path, \".cognee_system/code_graph\")).resolve()\n", ")\n", "cognee.config.system_root_directory(cognee_directory_path)\n", "\n", "await cognee.prune.prune_data()\n", "await cognee.prune.prune_system(metadata=True)\n", "\n", "await create_db_and_tables()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from os import path\n", "from pathlib import Path\n", "from cognee.infrastructure.files.storage import LocalStorage\n", "import git\n", "\n", "notebook_path = path.abspath(\"\")\n", "repo_clone_location = path.join(notebook_path, \".data/graphrag\")\n", "\n", "LocalStorage.remove_all(repo_clone_location)\n", "\n", "git.Repo.clone_from(\n", " \"git@github.com:microsoft/graphrag.git\",\n", " Path(repo_clone_location),\n", " branch=\"main\",\n", " single_branch=True,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from cognee.tasks.repo_processor import (\n", " enrich_dependency_graph,\n", " expand_dependency_graph,\n", " get_repo_file_dependencies,\n", ")\n", "from cognee.tasks.storage import add_data_points\n", "from cognee.modules.pipelines.tasks.Task import Task\n", "\n", "tasks = [\n", " Task(get_repo_file_dependencies),\n", " Task(add_data_points, task_config={\"batch_size\": 50}),\n", " Task(enrich_dependency_graph, task_config={\"batch_size\": 50}),\n", " Task(expand_dependency_graph, task_config={\"batch_size\": 50}),\n", " Task(add_data_points, task_config={\"batch_size\": 50}),\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from cognee.modules.pipelines import run_tasks\n", "\n", "notebook_path = os.path.abspath(\"\")\n", "repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n", "\n", "pipeline = run_tasks(tasks, repo_clone_location, \"code_graph_pipeline\")\n", "\n", "async for result in pipeline:\n", " print(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from cognee.shared.utils import render_graph\n", "\n", "await render_graph(None, include_nodes=True, include_labels=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": "# Let's check the evaluations" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from evals.eval_on_hotpot import eval_on_hotpotQA\n", "from evals.eval_on_hotpot import answer_with_cognee\n", "from evals.eval_on_hotpot import answer_without_cognee\n", "from evals.eval_on_hotpot import eval_answers\n", "from cognee.base_config import get_base_config\n", "from pathlib import Path\n", "from tqdm import tqdm\n", "import wget\n", "import json\n", "import statistics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "answer_provider = answer_with_cognee # For native LLM answers use answer_without_cognee\n", "num_samples = 10 # With cognee, it takes ~1m10s per sample\n", "\n", "base_config = get_base_config()\n", "data_root_dir = base_config.data_root_directory\n", "\n", "if not Path(data_root_dir).exists():\n", " Path(data_root_dir).mkdir()\n", "\n", "filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n", "if not filepath.exists():\n", " url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n", " wget.download(url, out=data_root_dir)\n", "\n", "with open(filepath, \"r\") as file:\n", " dataset = json.load(file)\n", "\n", "instances = dataset if not num_samples else dataset[:num_samples]\n", "answers = []\n", "for instance in tqdm(instances, desc=\"Getting answers\"):\n", " answer = answer_provider(instance)\n", " answers.append(answer)" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 2 }