fix: Refactor notebooks (#720)

## Description  ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
2025-04-11 10:23:22 +02:00 · 2025-04-11 10:23:22 +02:00 · 228fba8096
commit 228fba8096
parent 9536395468
8 changed files with 403 additions and 4862 deletions
--- a/notebooks/cognee_code_graph_demo.ipynb
+++ b/notebooks/cognee_code_graph_demo.ipynb
@ -1,155 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "os.environ[\"GRAPHISTRY_USERNAME\"] = input(\"Please enter your graphistry username\")\n",
-    "os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")\n",
-    "os.environ[\"OPENAI_API_KEY\"] = input(\"Please enter your OpenAI API key\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import pathlib\n",
-    "import cognee\n",
-    "from cognee.infrastructure.databases.relational import create_db_and_tables\n",
-    "\n",
-    "notebook_path = os.path.abspath(\"\")\n",
-    "data_directory_path = str(\n",
-    "    pathlib.Path(os.path.join(notebook_path, \".data_storage/code_graph\")).resolve()\n",
-    ")\n",
-    "cognee.config.data_root_directory(data_directory_path)\n",
-    "cognee_directory_path = str(\n",
-    "    pathlib.Path(os.path.join(notebook_path, \".cognee_system/code_graph\")).resolve()\n",
-    ")\n",
-    "cognee.config.system_root_directory(cognee_directory_path)\n",
-    "\n",
-    "await cognee.prune.prune_data()\n",
-    "await cognee.prune.prune_system(metadata=True)\n",
-    "\n",
-    "await create_db_and_tables()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from os import path\n",
-    "from pathlib import Path\n",
-    "from cognee.infrastructure.files.storage import LocalStorage\n",
-    "import git\n",
-    "\n",
-    "notebook_path = path.abspath(\"\")\n",
-    "repo_clone_location = path.join(notebook_path, \".data/graphrag\")\n",
-    "\n",
-    "LocalStorage.remove_all(repo_clone_location)\n",
-    "\n",
-    "git.Repo.clone_from(\n",
-    "    \"git@github.com:microsoft/graphrag.git\",\n",
-    "    Path(repo_clone_location),\n",
-    "    branch=\"main\",\n",
-    "    single_branch=True,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from cognee.tasks.repo_processor import (\n",
-    "    get_repo_file_dependencies,\n",
-    ")\n",
-    "from cognee.tasks.storage import add_data_points\n",
-    "from cognee.modules.pipelines.tasks.Task import Task\n",
-    "\n",
-    "detailed_extraction = True\n",
-    "\n",
-    "tasks = [\n",
-    "    Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),\n",
-    "    Task(add_data_points, task_config={\"batch_size\": 100 if detailed_extraction else 500}),\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from cognee.modules.pipelines import run_tasks\n",
-    "from uuid import uuid5, NAMESPACE_OID\n",
-    "\n",
-    "pipeline = run_tasks(tasks, uuid5(NAMESPACE_OID, repo_clone_location), repo_clone_location, \"code_graph_pipeline\")\n",
-    "\n",
-    "async for result in pipeline:\n",
-    "    print(result)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from cognee.shared.utils import render_graph\n",
-    "\n",
-    "await render_graph()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Let's check the evaluations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from cognee import search, SearchType\n",
-    "\n",
-    "results = await search(query_type=SearchType.CODE, query_text=\"def create_graphrag_config\")\n",
-    "\n",
-    "print(results)\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/notebooks/cognee_demo.ipynb
+++ b/notebooks/cognee_demo.ipynb
--- a/notebooks/cognee_hotpot_eval.ipynb
+++ b/notebooks/cognee_hotpot_eval.ipynb
--- a/notebooks/cognee_simple_demo.ipynb
+++ b/notebooks/cognee_simple_demo.ipynb
@ -12,9 +12,7 @@
   "cell_type": "code",
   "id": "982b897a29a26f7d",
   "metadata": {},
-   "source": [
-    "!pip install cognee==0.1.26"
-   ],
+   "source": "!pip install cognee==0.1.36",
   "outputs": [],
   "execution_count": null
  },
--- a/notebooks/graphrag_vs_rag.ipynb
+++ b/notebooks/graphrag_vs_rag.ipynb
--- a/notebooks/hr_demo.ipynb
+++ b/notebooks/hr_demo.ipynb
@ -1,977 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "d35ac8ce-0f92-46f5-9ba4-a46970f0ce19",
-   "metadata": {},
-   "source": [
-    "# Cognee - Get Started"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "074f0ea8-c659-4736-be26-be4b0e5ac665",
-   "metadata": {},
-   "source": [
-    "# Demo time"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0587d91d",
-   "metadata": {},
-   "source": [
-    "#### First let's define some data that we will cognify and perform a search on"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "df16431d0f48b006",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:48.519686Z",
-     "start_time": "2024-09-20T14:02:48.515589Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "job_position = \"\"\"Senior Data Scientist (Machine Learning)\n",
-    "\n",
-    "Company: TechNova Solutions\n",
-    "Location: San Francisco, CA\n",
-    "\n",
-    "Job Description:\n",
-    "\n",
-    "TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights.\n",
-    "\n",
-    "Responsibilities:\n",
-    "\n",
-    "Develop and implement advanced machine learning algorithms and models.\n",
-    "Analyze large, complex datasets to extract meaningful patterns and insights.\n",
-    "Collaborate with cross-functional teams to integrate predictive models into products.\n",
-    "Stay updated with the latest advancements in machine learning and data science.\n",
-    "Mentor junior data scientists and provide technical guidance.\n",
-    "Qualifications:\n",
-    "\n",
-    "Master’s or Ph.D. in Data Science, Computer Science, Statistics, or a related field.\n",
-    "5+ years of experience in data science and machine learning.\n",
-    "Proficient in Python, R, and SQL.\n",
-    "Experience with deep learning frameworks (e.g., TensorFlow, PyTorch).\n",
-    "Strong problem-solving skills and attention to detail.\n",
-    "Candidate CVs\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9086abf3af077ab4",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:49.120838Z",
-     "start_time": "2024-09-20T14:02:49.118294Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "job_1 = \"\"\"\n",
-    "CV 1: Relevant\n",
-    "Name: Dr. Emily Carter\n",
-    "Contact Information:\n",
-    "\n",
-    "Email: emily.carter@example.com\n",
-    "Phone: (555) 123-4567\n",
-    "Summary:\n",
-    "\n",
-    "Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.\n",
-    "\n",
-    "Education:\n",
-    "\n",
-    "Ph.D. in Computer Science, Stanford University (2014)\n",
-    "B.S. in Mathematics, University of California, Berkeley (2010)\n",
-    "Experience:\n",
-    "\n",
-    "Senior Data Scientist, InnovateAI Labs (2016 – Present)\n",
-    "Led a team in developing machine learning models for natural language processing applications.\n",
-    "Implemented deep learning algorithms that improved prediction accuracy by 25%.\n",
-    "Collaborated with cross-functional teams to integrate models into cloud-based platforms.\n",
-    "Data Scientist, DataWave Analytics (2014 – 2016)\n",
-    "Developed predictive models for customer segmentation and churn analysis.\n",
-    "Analyzed large datasets using Hadoop and Spark frameworks.\n",
-    "Skills:\n",
-    "\n",
-    "Programming Languages: Python, R, SQL\n",
-    "Machine Learning: TensorFlow, Keras, Scikit-Learn\n",
-    "Big Data Technologies: Hadoop, Spark\n",
-    "Data Visualization: Tableau, Matplotlib\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a9de0cc07f798b7f",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:49.675003Z",
-     "start_time": "2024-09-20T14:02:49.671615Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "job_2 = \"\"\"\n",
-    "CV 2: Relevant\n",
-    "Name: Michael Rodriguez\n",
-    "Contact Information:\n",
-    "\n",
-    "Email: michael.rodriguez@example.com\n",
-    "Phone: (555) 234-5678\n",
-    "Summary:\n",
-    "\n",
-    "Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.\n",
-    "\n",
-    "Education:\n",
-    "\n",
-    "M.S. in Data Science, Carnegie Mellon University (2013)\n",
-    "B.S. in Computer Science, University of Michigan (2011)\n",
-    "Experience:\n",
-    "\n",
-    "Senior Data Scientist, Alpha Analytics (2017 – Present)\n",
-    "Developed machine learning models to optimize marketing strategies.\n",
-    "Reduced customer acquisition cost by 15% through predictive modeling.\n",
-    "Data Scientist, TechInsights (2013 – 2017)\n",
-    "Analyzed user behavior data to improve product features.\n",
-    "Implemented A/B testing frameworks to evaluate product changes.\n",
-    "Skills:\n",
-    "\n",
-    "Programming Languages: Python, Java, SQL\n",
-    "Machine Learning: Scikit-Learn, XGBoost\n",
-    "Data Visualization: Seaborn, Plotly\n",
-    "Databases: MySQL, MongoDB\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "185ff1c102d06111",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:50.286828Z",
-     "start_time": "2024-09-20T14:02:50.284369Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "job_3 = \"\"\"\n",
-    "CV 3: Relevant\n",
-    "Name: Sarah Nguyen\n",
-    "Contact Information:\n",
-    "\n",
-    "Email: sarah.nguyen@example.com\n",
-    "Phone: (555) 345-6789\n",
-    "Summary:\n",
-    "\n",
-    "Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.\n",
-    "\n",
-    "Education:\n",
-    "\n",
-    "M.S. in Statistics, University of Washington (2014)\n",
-    "B.S. in Applied Mathematics, University of Texas at Austin (2012)\n",
-    "Experience:\n",
-    "\n",
-    "Data Scientist, QuantumTech (2016 – Present)\n",
-    "Designed and implemented machine learning algorithms for financial forecasting.\n",
-    "Improved model efficiency by 20% through algorithm optimization.\n",
-    "Junior Data Scientist, DataCore Solutions (2014 – 2016)\n",
-    "Assisted in developing predictive models for supply chain optimization.\n",
-    "Conducted data cleaning and preprocessing on large datasets.\n",
-    "Skills:\n",
-    "\n",
-    "Programming Languages: Python, R\n",
-    "Machine Learning Frameworks: PyTorch, Scikit-Learn\n",
-    "Statistical Analysis: SAS, SPSS\n",
-    "Cloud Platforms: AWS, Azure\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d55ce4c58f8efb67",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:50.950343Z",
-     "start_time": "2024-09-20T14:02:50.946378Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "job_4 = \"\"\"\n",
-    "CV 4: Not Relevant\n",
-    "Name: David Thompson\n",
-    "Contact Information:\n",
-    "\n",
-    "Email: david.thompson@example.com\n",
-    "Phone: (555) 456-7890\n",
-    "Summary:\n",
-    "\n",
-    "Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.\n",
-    "\n",
-    "Education:\n",
-    "\n",
-    "B.F.A. in Graphic Design, Rhode Island School of Design (2012)\n",
-    "Experience:\n",
-    "\n",
-    "Senior Graphic Designer, CreativeWorks Agency (2015 – Present)\n",
-    "Led design projects for clients in various industries.\n",
-    "Created branding materials that increased client engagement by 30%.\n",
-    "Graphic Designer, Visual Innovations (2012 – 2015)\n",
-    "Designed marketing collateral, including brochures, logos, and websites.\n",
-    "Collaborated with the marketing team to develop cohesive brand strategies.\n",
-    "Skills:\n",
-    "\n",
-    "Design Software: Adobe Photoshop, Illustrator, InDesign\n",
-    "Web Design: HTML, CSS\n",
-    "Specialties: Branding and Identity, Typography\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ca4ecc32721ad332",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:51.548191Z",
-     "start_time": "2024-09-20T14:02:51.545520Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "job_5 = \"\"\"\n",
-    "CV 5: Not Relevant\n",
-    "Name: Jessica Miller\n",
-    "Contact Information:\n",
-    "\n",
-    "Email: jessica.miller@example.com\n",
-    "Phone: (555) 567-8901\n",
-    "Summary:\n",
-    "\n",
-    "Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.\n",
-    "\n",
-    "Education:\n",
-    "\n",
-    "B.A. in Business Administration, University of Southern California (2010)\n",
-    "Experience:\n",
-    "\n",
-    "Sales Manager, Global Enterprises (2015 – Present)\n",
-    "Managed a sales team of 15 members, achieving a 20% increase in annual revenue.\n",
-    "Developed sales strategies that expanded customer base by 25%.\n",
-    "Sales Representative, Market Leaders Inc. (2010 – 2015)\n",
-    "Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.\n",
-    "Skills:\n",
-    "\n",
-    "Sales Strategy and Planning\n",
-    "Team Leadership and Development\n",
-    "CRM Software: Salesforce, Zoho\n",
-    "Negotiation and Relationship Building\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4415446a",
-   "metadata": {},
-   "source": [
-    "#### Please add the necessary environment information bellow:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bce39dc6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "# Setting environment variables\n",
-    "if \"GRAPHISTRY_USERNAME\" not in os.environ:\n",
-    "    os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
-    "\n",
-    "if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n",
-    "    os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
-    "\n",
-    "if \"LLM_API_KEY\" not in os.environ:\n",
-    "    os.environ[\"LLM_API_KEY\"] = \"\"\n",
-    "\n",
-    "# \"neo4j\" or \"networkx\"\n",
-    "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n",
-    "# Not needed if using networkx\n",
-    "# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
-    "# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
-    "# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
-    "\n",
-    "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
-    "os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
-    "# Not needed if using \"lancedb\" or \"pgvector\"\n",
-    "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
-    "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
-    "\n",
-    "# Relational Database provider \"sqlite\" or \"postgres\"\n",
-    "os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
-    "\n",
-    "# Database name\n",
-    "os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
-    "\n",
-    "# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
-    "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
-    "# os.environ[\"DB_PORT\"]=\"5432\"\n",
-    "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
-    "# os.environ[\"DB_PASSWORD\"]=\"cognee\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9f1a1dbd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Reset the cognee system with the following command:\n",
-    "\n",
-    "import cognee\n",
-    "\n",
-    "await cognee.prune.prune_data()\n",
-    "await cognee.prune.prune_system(metadata=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "383d6971",
-   "metadata": {},
-   "source": [
-    "#### After we have defined and gathered our data let's add it to cognee "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "904df61ba484a8e5",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:54.243987Z",
-     "start_time": "2024-09-20T14:02:52.498195Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import cognee\n",
-    "\n",
-    "await cognee.add([job_1, job_2, job_3, job_4, job_5, job_position], \"example\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0f15c5b1",
-   "metadata": {},
-   "source": [
-    "#### All good, let's cognify it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7c431fdef4921ae0",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:57.925667Z",
-     "start_time": "2024-09-20T14:02:57.922353Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from cognee.shared.data_models import KnowledgeGraph\n",
-    "from cognee.modules.data.models import Dataset, Data\n",
-    "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
-    "from cognee.modules.cognify.config import get_cognify_config\n",
-    "from cognee.modules.pipelines.tasks.Task import Task\n",
-    "from cognee.modules.pipelines import run_tasks\n",
-    "from cognee.modules.users.models import User\n",
-    "from cognee.tasks.documents import (\n",
-    "    check_permissions_on_documents,\n",
-    "    classify_documents,\n",
-    "    extract_chunks_from_documents,\n",
-    ")\n",
-    "from cognee.tasks.graph import extract_graph_from_data\n",
-    "from cognee.tasks.storage import add_data_points\n",
-    "from cognee.tasks.summarization import summarize_text\n",
-    "\n",
-    "\n",
-    "async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
-    "    data_documents: list[Data] = await get_dataset_data(dataset_id=dataset.id)\n",
-    "\n",
-    "    try:\n",
-    "        cognee_config = get_cognify_config()\n",
-    "\n",
-    "        tasks = [\n",
-    "            Task(classify_documents),\n",
-    "            Task(check_permissions_on_documents, user=user, permissions=[\"write\"]),\n",
-    "            Task(extract_chunks_from_documents),  # Extract text chunks based on the document type.\n",
-    "            Task(\n",
-    "                extract_graph_from_data, graph_model=KnowledgeGraph, task_config={\"batch_size\": 10}\n",
-    "            ),  # Generate knowledge graphs from the document chunks.\n",
-    "            Task(\n",
-    "                summarize_text,\n",
-    "                summarization_model=cognee_config.summarization_model,\n",
-    "                task_config={\"batch_size\": 10},\n",
-    "            ),\n",
-    "            Task(add_data_points, task_config={\"batch_size\": 10}),\n",
-    "        ]\n",
-    "\n",
-    "        pipeline = run_tasks(tasks, data_documents)\n",
-    "\n",
-    "        async for result in pipeline:\n",
-    "            print(result)\n",
-    "    except Exception as error:\n",
-    "        raise error"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f0a91b99c6215e09",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-09-20T14:02:58.905774Z",
-     "start_time": "2024-09-20T14:02:58.625915Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from cognee.modules.users.methods import get_default_user\n",
-    "from cognee.modules.data.methods import get_datasets_by_name\n",
-    "\n",
-    "user = await get_default_user()\n",
-    "\n",
-    "datasets = await get_datasets_by_name([\"example\"], user.id)\n",
-    "\n",
-    "await run_cognify_pipeline(datasets[0], user)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "219a6d41",
-   "metadata": {},
-   "source": [
-    "#### We get the url to the graph on graphistry in the notebook cell bellow, showing nodes and connections made by the cognify process."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "080389e5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from cognee.shared.utils import render_graph\n",
-    "from cognee.infrastructure.databases.graph import get_graph_engine\n",
-    "import graphistry\n",
-    "\n",
-    "graphistry.login(\n",
-    "    username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\")\n",
-    ")\n",
-    "\n",
-    "graph_engine = await get_graph_engine()\n",
-    "\n",
-    "graph_url = await render_graph(graph_engine.graph)\n",
-    "print(graph_url)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "59e6c3c3",
-   "metadata": {},
-   "source": [
-    "#### We can also do a search on the data to explore the knowledge."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e5e7dfc8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "async def search(\n",
-    "    vector_engine,\n",
-    "    collection_name: str,\n",
-    "    query_text: str = None,\n",
-    "):\n",
-    "    query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]\n",
-    "\n",
-    "    connection = await vector_engine.get_connection()\n",
-    "    collection = await connection.open_table(collection_name)\n",
-    "\n",
-    "    results = await collection.vector_search(query_vector).limit(10).to_pandas()\n",
-    "\n",
-    "    result_values = list(results.to_dict(\"index\").values())\n",
-    "\n",
-    "    return [\n",
-    "        dict(\n",
-    "            id=str(result[\"id\"]),\n",
-    "            payload=result[\"payload\"],\n",
-    "            score=result[\"_distance\"],\n",
-    "        )\n",
-    "        for result in result_values\n",
-    "    ]\n",
-    "\n",
-    "\n",
-    "from cognee.infrastructure.databases.vector import get_vector_engine\n",
-    "\n",
-    "vector_engine = get_vector_engine()\n",
-    "results = await search(vector_engine, \"Entity_name\", \"sarah.nguyen@example.com\")\n",
-    "for result in results:\n",
-    "    print(result)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "81fa2b00",
-   "metadata": {},
-   "source": [
-    "#### We normalize search output scores so the lower the score of the search result is the higher the chance that it's what you're looking for. In the example above we have searched for node entities in the knowledge graph related to \"sarah.nguyen@example.com\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1b94ff96",
-   "metadata": {},
-   "source": [
-    "#### In the example bellow we'll use cognee search to summarize information regarding the node most related to \"sarah.nguyen@example.com\" in the knowledge graph"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "21a3e9a6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from cognee.api.v1.search import SearchType\n",
-    "\n",
-    "node = (await vector_engine.search(\"Entity_name\", \"sarah.nguyen@example.com\"))[0]\n",
-    "node_name = node.payload[\"text\"]\n",
-    "\n",
-    "search_results = await cognee.search(query_type=SearchType.SUMMARIES, query_text=node_name)\n",
-    "print(\"\\n\\Extracted summaries are:\\n\")\n",
-    "for result in search_results:\n",
-    "    print(f\"{result}\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fd6e5fe2",
-   "metadata": {},
-   "source": [
-    "#### In this example we'll use cognee search to find chunks in which the node most related to \"sarah.nguyen@example.com\" is a part of"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c7a8abff",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=node_name)\n",
-    "print(\"\\n\\nExtracted chunks are:\\n\")\n",
-    "for result in search_results:\n",
-    "    print(f\"{result}\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "47f0112f",
-   "metadata": {},
-   "source": [
-    "#### In this example we'll use cognee search to give us insights from the knowledge graph related to the node most related to \"sarah.nguyen@example.com\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "706a3954",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=node_name)\n",
-    "print(\"\\n\\nExtracted sentences are:\\n\")\n",
-    "for result in search_results:\n",
-    "    print(f\"{result}\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e519e30c0423c2a",
-   "metadata": {},
-   "source": [
-    "## Let's add evals"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3845443e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install \"cognee[deepeval]\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7a2c3c70",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from evals.eval_on_hotpot import deepeval_answers, answer_qa_instance\n",
-    "from evals.qa_dataset_utils import load_qa_dataset\n",
-    "from evals.qa_metrics_utils import get_metrics\n",
-    "from evals.qa_context_provider_utils import qa_context_providers\n",
-    "from pathlib import Path\n",
-    "from tqdm import tqdm\n",
-    "import statistics\n",
-    "import random"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "53a609d8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_samples = 10  # With cognee, it takes ~1m10s per sample\n",
-    "dataset_name_or_filename = \"hotpotqa\"\n",
-    "dataset = load_qa_dataset(dataset_name_or_filename)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7351ab8f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "context_provider_name = \"cognee\"\n",
-    "context_provider = qa_context_providers[context_provider_name]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9346115b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "random.seed(42)\n",
-    "instances = dataset if not num_samples else random.sample(dataset, num_samples)\n",
-    "\n",
-    "out_path = \"out\"\n",
-    "if not Path(out_path).exists():\n",
-    "    Path(out_path).mkdir()\n",
-    "contexts_filename = out_path / Path(\n",
-    "    f\"contexts_{dataset_name_or_filename.split('.')[0]}_{context_provider_name}.json\"\n",
-    ")\n",
-    "\n",
-    "answers = []\n",
-    "for instance in tqdm(instances, desc=\"Getting answers\"):\n",
-    "    answer = await answer_qa_instance(instance, context_provider, contexts_filename)\n",
-    "    answers.append(answer)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1e7d872d",
-   "metadata": {},
-   "source": [
-    "#### Define Metrics for Evaluation and Calculate Score\n",
-    "**Options**: \n",
-    "- **Correctness**: Is the actual output factually correct based on the expected output?\n",
-    "- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?\n",
-    "- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?\n",
-    "- **Empowerment**: How well does the answer help the reader understand and make informed judgements about the topic?\n",
-    "- **Directness**: How specifically and clearly does the answer address the question?\n",
-    "- **F1 Score**: the harmonic mean of the precision and recall, using word-level Exact Match\n",
-    "- **EM Score**: the rate at which the predicted strings exactly match their references, ignoring white spaces and capitalization."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c81e2b46",
-   "metadata": {},
-   "source": [
-    "##### Calculate `\"Correctness\"`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ae728344",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metric_name_list = [\"Correctness\"]\n",
-    "eval_metrics = get_metrics(metric_name_list)\n",
-    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "764aac6d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Correctness = statistics.mean(\n",
-    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
-    ")\n",
-    "print(Correctness)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6d3bbdc5",
-   "metadata": {},
-   "source": [
-    "##### Calculating `\"Comprehensiveness\"`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9793ef78",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metric_name_list = [\"Comprehensiveness\"]\n",
-    "eval_metrics = get_metrics(metric_name_list)\n",
-    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9add448a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Comprehensiveness = statistics.mean(\n",
-    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
-    ")\n",
-    "print(Comprehensiveness)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bce2fa25",
-   "metadata": {},
-   "source": [
-    "##### Calculating `\"Diversity\"`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f60a179e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metric_name_list = [\"Diversity\"]\n",
-    "eval_metrics = get_metrics(metric_name_list)\n",
-    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7ccbd0ab",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Diversity = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
-    "print(Diversity)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "191cab63",
-   "metadata": {},
-   "source": [
-    "##### Calculating`\"Empowerment\"`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "66bec0bf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metric_name_list = [\"Empowerment\"]\n",
-    "eval_metrics = get_metrics(metric_name_list)\n",
-    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1b043a8f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Empowerment = statistics.mean(\n",
-    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
-    ")\n",
-    "print(Empowerment)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2cac3be9",
-   "metadata": {},
-   "source": [
-    "##### Calculating `\"Directness\"`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "adaa17c0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metric_name_list = [\"Directness\"]\n",
-    "eval_metrics = get_metrics(metric_name_list)\n",
-    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3a8f97c9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Directness = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
-    "print(Directness)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1ad6feb8",
-   "metadata": {},
-   "source": [
-    "##### Calculating `\"F1\"`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bdc48259",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metric_name_list = [\"F1\"]\n",
-    "eval_metrics = get_metrics(metric_name_list)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c43c17c8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8bfcc46d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "F1_score = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
-    "print(F1_score)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2583f948",
-   "metadata": {},
-   "source": [
-    "##### Calculating `\"EM\"`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "90a8f630",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metric_name_list = [\"EM\"]\n",
-    "eval_metrics = get_metrics(metric_name_list)\n",
-    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8d1b1ea1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "EM = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
-    "print(EM)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "288ab570",
-   "metadata": {},
-   "source": [
-    "# Give us a star if you like it!\n",
-    "https://github.com/topoteretes/cognee"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "cognee-c83GrcRT-py3.11",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/notebooks/ontology_demo.ipynb
+++ b/notebooks/ontology_demo.ipynb
--- a/notebooks/pokemon_datapoints_notebook.ipynb
+++ b/notebooks/pokemon_datapoints_notebook.ipynb
@ -1,536 +0,0 @@
-{
- "cells": [
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:58:00.193158Z",
-     "start_time": "2025-03-04T11:58:00.190238Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "import nest_asyncio\n",
-    "nest_asyncio.apply()"
-   ],
-   "id": "2efba278d106bb5f",
-   "outputs": [],
-   "execution_count": 2
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Environment Configuration\n",
-    "#### Setup required directories and environment variables.\n"
-   ],
-   "id": "ccbb2bc23aa456ee"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:59:33.879188Z",
-     "start_time": "2025-03-04T11:59:33.873682Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "import pathlib\n",
-    "import os\n",
-    "import cognee\n",
-    "\n",
-    "notebook_dir = pathlib.Path().resolve()\n",
-    "data_directory_path = str(notebook_dir / \".data_storage\")\n",
-    "cognee_directory_path = str(notebook_dir / \".cognee_system\")\n",
-    "\n",
-    "cognee.config.data_root_directory(data_directory_path)\n",
-    "cognee.config.system_root_directory(cognee_directory_path)\n",
-    "\n",
-    "BASE_URL = \"https://pokeapi.co/api/v2/\"\n",
-    "os.environ[\"BUCKET_URL\"] = data_directory_path\n",
-    "os.environ[\"DATA_WRITER__DISABLE_COMPRESSION\"] = \"true\"\n"
-   ],
-   "id": "662d554f96f211d9",
-   "outputs": [],
-   "execution_count": 8
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "## Initialize DLT Pipeline\n",
-    "### Create the DLT pipeline to fetch Pokémon data.\n"
-   ],
-   "id": "36ae0be71f6e9167"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:58:03.982939Z",
-     "start_time": "2025-03-04T11:58:03.819676Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "import dlt\n",
-    "from pathlib import Path\n",
-    "\n",
-    "pipeline = dlt.pipeline(\n",
-    "    pipeline_name=\"pokemon_pipeline\",\n",
-    "    destination=\"filesystem\",\n",
-    "    dataset_name=\"pokemon_data\",\n",
-    ")\n"
-   ],
-   "id": "25101ae5f016ce0c",
-   "outputs": [],
-   "execution_count": 4
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Fetch Pokémon List\n",
-    "### Retrieve a list of Pokémon from the API.\n"
-   ],
-   "id": "9a87ce05a072c48b"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:58:03.990076Z",
-     "start_time": "2025-03-04T11:58:03.987199Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "@dlt.resource(write_disposition=\"replace\")\n",
-    "def pokemon_list(limit: int = 50):\n",
-    "    import requests\n",
-    "    response = requests.get(f\"{BASE_URL}pokemon\", params={\"limit\": limit})\n",
-    "    response.raise_for_status()\n",
-    "    yield response.json()[\"results\"]\n"
-   ],
-   "id": "3b6e60778c61e24a",
-   "outputs": [],
-   "execution_count": 5
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Fetch Pokémon Details\n",
-    "### Fetch detailed information about each Pokémon.\n"
-   ],
-   "id": "9952767846194e97"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:58:03.996394Z",
-     "start_time": "2025-03-04T11:58:03.994122Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "@dlt.transformer(data_from=pokemon_list)\n",
-    "def pokemon_details(pokemons):\n",
-    "    \"\"\"Fetches detailed info for each Pokémon\"\"\"\n",
-    "    import requests\n",
-    "    for pokemon in pokemons:\n",
-    "        response = requests.get(pokemon[\"url\"])\n",
-    "        response.raise_for_status()\n",
-    "        yield response.json()\n"
-   ],
-   "id": "79ec9fef12267485",
-   "outputs": [],
-   "execution_count": 6
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Run Data Pipeline\n",
-    "### Execute the pipeline and store Pokémon data.\n"
-   ],
-   "id": "41e05f660bf9e9d2"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:59:41.571015Z",
-     "start_time": "2025-03-04T11:59:36.840744Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "info = pipeline.run([pokemon_list, pokemon_details])\n",
-    "print(info)\n"
-   ],
-   "id": "20a3b2c7f404677f",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Pipeline pokemon_pipeline load step completed in 0.06 seconds\n",
-      "1 load package(s) were loaded to destination filesystem and into dataset pokemon_data\n",
-      "The filesystem destination used file:///Users/lazar/PycharmProjects/cognee/.data_storage location to store data\n",
-      "Load package 1741089576.860229 is LOADED and contains no failed jobs\n"
-     ]
-    }
-   ],
-   "execution_count": 9
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Load Pokémon Abilities\n",
-    "### Load Pokémon ability data from stored files.\n"
-   ],
-   "id": "937f10b8d1037743"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:59:44.377719Z",
-     "start_time": "2025-03-04T11:59:44.363718Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "import json\n",
-    "from cognee.low_level import DataPoint\n",
-    "from uuid import uuid5, NAMESPACE_OID\n",
-    "\n",
-    "class Abilities(DataPoint):\n",
-    "    name: str = \"Abilities\"\n",
-    "    metadata: dict = {\"index_fields\": [\"name\"]}\n",
-    "\n",
-    "def load_abilities_data(jsonl_abilities):\n",
-    "    abilities_root = Abilities()\n",
-    "    pokemon_abilities = []\n",
-    "\n",
-    "    for jsonl_ability in jsonl_abilities:\n",
-    "        with open(jsonl_ability, \"r\") as f:\n",
-    "            for line in f:\n",
-    "                ability = json.loads(line)\n",
-    "                ability[\"id\"] = uuid5(NAMESPACE_OID, ability[\"_dlt_id\"])\n",
-    "                ability[\"name\"] = ability[\"ability__name\"]\n",
-    "                ability[\"is_type\"] = abilities_root\n",
-    "                pokemon_abilities.append(ability)\n",
-    "\n",
-    "    return abilities_root, pokemon_abilities\n"
-   ],
-   "id": "be73050036439ea1",
-   "outputs": [],
-   "execution_count": 10
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Load Pokémon Data\n",
-    "### Load Pokémon details and associate them with abilities.\n"
-   ],
-   "id": "98c97f799f73df77"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:59:46.251306Z",
-     "start_time": "2025-03-04T11:59:46.238283Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from typing import List, Optional\n",
-    "\n",
-    "class Pokemons(DataPoint):\n",
-    "    name: str = \"Pokemons\"\n",
-    "    have: Abilities\n",
-    "    metadata: dict = {\"index_fields\": [\"name\"]}\n",
-    "\n",
-    "class PokemonAbility(DataPoint):\n",
-    "    name: str\n",
-    "    ability__name: str\n",
-    "    ability__url: str\n",
-    "    is_hidden: bool\n",
-    "    slot: int\n",
-    "    _dlt_load_id: str\n",
-    "    _dlt_id: str\n",
-    "    _dlt_parent_id: str\n",
-    "    _dlt_list_idx: str\n",
-    "    is_type: Abilities\n",
-    "    metadata: dict = {\"index_fields\": [\"ability__name\"]}\n",
-    "\n",
-    "class Pokemon(DataPoint):\n",
-    "    name: str\n",
-    "    base_experience: int\n",
-    "    height: int\n",
-    "    weight: int\n",
-    "    is_default: bool\n",
-    "    order: int\n",
-    "    location_area_encounters: str\n",
-    "    species__name: str\n",
-    "    species__url: str\n",
-    "    cries__latest: str\n",
-    "    cries__legacy: str\n",
-    "    sprites__front_default: str\n",
-    "    sprites__front_shiny: str\n",
-    "    sprites__back_default: Optional[str]\n",
-    "    sprites__back_shiny: Optional[str]\n",
-    "    _dlt_load_id: str\n",
-    "    _dlt_id: str\n",
-    "    is_type: Pokemons\n",
-    "    abilities: List[PokemonAbility]\n",
-    "    metadata: dict = {\"index_fields\": [\"name\"]}\n",
-    "\n",
-    "def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):\n",
-    "    pokemons = []\n",
-    "\n",
-    "    for jsonl_pokemon in jsonl_pokemons:\n",
-    "        with open(jsonl_pokemon, \"r\") as f:\n",
-    "            for line in f:\n",
-    "                pokemon_data = json.loads(line)\n",
-    "                abilities = [\n",
-    "                    ability for ability in pokemon_abilities\n",
-    "                    if ability[\"_dlt_parent_id\"] == pokemon_data[\"_dlt_id\"]\n",
-    "                ]\n",
-    "                pokemon_data[\"external_id\"] = pokemon_data[\"id\"]\n",
-    "                pokemon_data[\"id\"] = uuid5(NAMESPACE_OID, str(pokemon_data[\"id\"]))\n",
-    "                pokemon_data[\"abilities\"] = [PokemonAbility(**ability) for ability in abilities]\n",
-    "                pokemon_data[\"is_type\"] = pokemon_root\n",
-    "                pokemons.append(Pokemon(**pokemon_data))\n",
-    "\n",
-    "    return pokemons\n"
-   ],
-   "id": "7862951248df0bf5",
-   "outputs": [],
-   "execution_count": 11
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Process Pokémon Data\n",
-    "### Load and associate Pokémon abilities.\n"
-   ],
-   "id": "676fa5a2b61c2107"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:59:47.365226Z",
-     "start_time": "2025-03-04T11:59:47.356722Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "STORAGE_PATH = Path(\".data_storage/pokemon_data/pokemon_details\")\n",
-    "jsonl_pokemons = sorted(STORAGE_PATH.glob(\"*.jsonl\"))\n",
-    "\n",
-    "ABILITIES_PATH = Path(\".data_storage/pokemon_data/pokemon_details__abilities\")\n",
-    "jsonl_abilities = sorted(ABILITIES_PATH.glob(\"*.jsonl\"))\n",
-    "\n",
-    "abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)\n",
-    "pokemon_root = Pokemons(have=abilities_root)\n",
-    "pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)\n"
-   ],
-   "id": "ad14cdecdccd71bb",
-   "outputs": [],
-   "execution_count": 12
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Initialize Cognee\n",
-    "### Setup Cognee for data processing.\n"
-   ],
-   "id": "59dec67b2ae50f0f"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:59:49.244577Z",
-     "start_time": "2025-03-04T11:59:48.618261Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "import asyncio\n",
-    "from cognee.low_level import setup as cognee_setup\n",
-    "\n",
-    "async def initialize_cognee():\n",
-    "    await cognee.prune.prune_data()\n",
-    "    await cognee.prune.prune_system(metadata=True)\n",
-    "    await cognee_setup()\n",
-    "\n",
-    "await initialize_cognee()\n"
-   ],
-   "id": "d2e095ae576a02c1",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully.INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully."
-     ]
-    }
-   ],
-   "execution_count": 13
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Process Pokémon Data\n",
-    "### Add Pokémon data points to Cognee.\n"
-   ],
-   "id": "5f0b8090bc7b1fe6"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T11:59:57.744035Z",
-     "start_time": "2025-03-04T11:59:50.574033Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from cognee.modules.pipelines.tasks.Task import Task\n",
-    "from cognee.tasks.storage import add_data_points\n",
-    "from cognee.modules.pipelines import run_tasks\n",
-    "\n",
-    "tasks = [Task(add_data_points, task_config={\"batch_size\": 50})]\n",
-    "results = run_tasks(\n",
-    "    tasks=tasks,\n",
-    "    data=pokemons,\n",
-    "    dataset_id=uuid5(NAMESPACE_OID, \"Pokemon\"),\n",
-    "    pipeline_name='pokemon_pipeline',\n",
-    ")\n",
-    "\n",
-    "async for result in results:\n",
-    "    print(result)\n",
-    "print(\"Done\")\n"
-   ],
-   "id": "ffa12fc1f5350d95",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:run_tasks(tasks: [Task], data):Pipeline run started: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`INFO:run_tasks(tasks: [Task], data):Coroutine task started: `add_data_points`"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x300bb3950>\n",
-      "User d347ea85-e512-4cae-b9d7-496fe1745424 has registered.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:79: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
-      "  class PGVectorDataPoint(Base):\n",
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:113: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
-      "  class PGVectorDataPoint(Base):\n",
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 8, column: 16, offset: 335} for query: '\\n        UNWIND $nodes AS node\\n        MERGE (n {id: node.node_id})\\n        ON CREATE SET n += node.properties, n.updated_at = timestamp()\\n        ON MATCH SET n += node.properties, n.updated_at = timestamp()\\n        WITH n, node.node_id AS label\\n        CALL apoc.create.addLabels(n, [label]) YIELD node AS labeledNode\\n        RETURN ID(labeledNode) AS internal_id, labeledNode.id AS nodeId\\n        'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:run_tasks(tasks: [Task], data):Coroutine task completed: `add_data_points`INFO:run_tasks(tasks: [Task], data):Pipeline run completed: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x30016fd40>\n",
-      "Done\n"
-     ]
-    }
-   ],
-   "execution_count": 14
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "##  Search Pokémon Data\n",
-    "### Execute a search query using Cognee.\n"
-   ],
-   "id": "e0d98d9832a2797a"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-04T12:00:02.878871Z",
-     "start_time": "2025-03-04T11:59:59.571965Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from cognee.api.v1.search import SearchType\n",
-    "\n",
-    "search_results = await cognee.search(\n",
-    "    query_type=SearchType.GRAPH_COMPLETION,\n",
-    "    query_text=\"pokemons?\"\n",
-    ")\n",
-    "\n",
-    "print(\"Search results:\")\n",
-    "for result_text in search_results:\n",
-    "    print(result_text)"
-   ],
-   "id": "bb2476b6b0c2aff",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\u001B[92m13:00:02 - LiteLLM:INFO\u001B[0m: utils.py:2784 - \n",
-      "LiteLLM completion() model= gpt-4o-mini; provider = openaiINFO:LiteLLM:\n",
-      "LiteLLM completion() model= gpt-4o-mini; provider = openai"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Search results:\n",
-      "The Pokemons mentioned are: golbat, jigglypuff, raichu, vulpix, and pikachu.\n"
-     ]
-    }
-   ],
-   "execution_count": 15
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "outputs": [],
-   "execution_count": null,
-   "source": "",
-   "id": "a4c2d3e9c15b017"
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "version": "3.8.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}