fix: notebooks errors (#565)

## Description  ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin  ## Summary by CodeRabbit - **New Features** - Automatically creates a blank graph when a file isn’t found, ensuring smoother operations. - Updated demonstration notebooks with dynamic configurations, including refined search operations and input prompts. - Introduced optional support for additional graph functionalities via an integrated dependency. - **Refactor** - Streamlined processing by eliminating duplicate steps and simplifying graph rendering workflows. - **Chores** - Updated environment configurations and upgraded the Python runtime for improved performance and consistency.
2025-02-19 23:07:11 +01:00 · 2025-02-19 23:07:11 +01:00 · 45f7c63322
commit 45f7c63322
parent 811e932cae
5 changed files with 422 additions and 451 deletions
--- a/cognee/infrastructure/databases/graph/networkx/adapter.py
+++ b/cognee/infrastructure/databases/graph/networkx/adapter.py
@ -243,6 +243,15 @@ class NetworkXAdapter(GraphDBInterface):

        await self.save_graph_to_file(self.filename)

+    async def create_empty_graph(self, file_path: str) -> None:
+        self.graph = nx.MultiDiGraph()
+
+        file_dir = os.path.dirname(file_path)
+        if not os.path.exists(file_dir):
+            os.makedirs(file_dir, exist_ok=True)
+
+        await self.save_graph_to_file(file_path)
+
    async def save_graph_to_file(self, file_path: str = None) -> None:
        """Asynchronously save the graph to a file in JSON format."""
        if not file_path:
@ -322,19 +331,12 @@ class NetworkXAdapter(GraphDBInterface):
            else:
                # Log that the file does not exist and an empty graph is initialized
                logger.warning("File %s not found. Initializing an empty graph.", file_path)
-                self.graph = (
-                    nx.MultiDiGraph()
-                )  # Use MultiDiGraph to keep it consistent with __init__
+                await self.create_empty_graph(file_path)

-                file_dir = os.path.dirname(file_path)
-                if not os.path.exists(file_dir):
-                    os.makedirs(file_dir, exist_ok=True)
-
-                await self.save_graph_to_file(file_path)
-
-        except Exception as e:
+        except Exception:
            logger.error("Failed to load graph from file: %s", file_path)
-            raise e
+
+            await self.create_empty_graph(file_path)

    async def delete_graph(self, file_path: str = None):
        """Asynchronously delete the graph file from the filesystem."""
--- a/notebooks/cognee_code_graph_demo.ipynb
+++ b/notebooks/cognee_code_graph_demo.ipynb
@ -9,7 +9,8 @@
    "import os\n",
    "\n",
    "os.environ[\"GRAPHISTRY_USERNAME\"] = input(\"Please enter your graphistry username\")\n",
-    "os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")"
+    "os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")\n",
+    "os.environ[\"OPENAI_API_KEY\"] = input(\"Please enter your OpenAI API key\")"
   ]
  },
  {
@ -70,19 +71,16 @@
   "outputs": [],
   "source": [
    "from cognee.tasks.repo_processor import (\n",
-    "    enrich_dependency_graph,\n",
-    "    expand_dependency_graph,\n",
    "    get_repo_file_dependencies,\n",
    ")\n",
    "from cognee.tasks.storage import add_data_points\n",
    "from cognee.modules.pipelines.tasks.Task import Task\n",
    "\n",
+    "detailed_extraction = True\n",
+    "\n",
    "tasks = [\n",
-    "    Task(get_repo_file_dependencies),\n",
-    "    Task(add_data_points, task_config={\"batch_size\": 50}),\n",
-    "    Task(enrich_dependency_graph, task_config={\"batch_size\": 50}),\n",
-    "    Task(expand_dependency_graph, task_config={\"batch_size\": 50}),\n",
-    "    Task(add_data_points, task_config={\"batch_size\": 50}),\n",
+    "    Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),\n",
+    "    Task(add_data_points, task_config={\"batch_size\": 100 if detailed_extraction else 500}),\n",
    "]"
   ]
  },
@ -95,9 +93,6 @@
    "from cognee.modules.pipelines import run_tasks\n",
    "from uuid import uuid5, NAMESPACE_OID\n",
    "\n",
-    "notebook_path = os.path.abspath(\"\")\n",
-    "repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n",
-    "\n",
    "pipeline = run_tasks(tasks, uuid5(NAMESPACE_OID, repo_clone_location), repo_clone_location, \"code_graph_pipeline\")\n",
    "\n",
    "async for result in pipeline:\n",
@ -112,7 +107,7 @@
   "source": [
    "from cognee.shared.utils import render_graph\n",
    "\n",
-    "await render_graph(None, include_nodes=True, include_labels=True)"
+    "await render_graph()"
   ]
  },
  {
@ -128,46 +123,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from evals.eval_on_hotpot import eval_on_hotpotQA\n",
-    "from evals.eval_on_hotpot import answer_with_cognee\n",
-    "from evals.eval_on_hotpot import answer_without_cognee\n",
-    "from evals.eval_on_hotpot import eval_answers\n",
-    "from cognee.base_config import get_base_config\n",
-    "from pathlib import Path\n",
-    "from tqdm import tqdm\n",
-    "import wget\n",
-    "import json\n",
-    "import statistics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "answer_provider = answer_with_cognee  # For native LLM answers use answer_without_cognee\n",
-    "num_samples = 10  # With cognee, it takes ~1m10s per sample\n",
+    "from cognee import search, SearchType\n",
    "\n",
-    "base_config = get_base_config()\n",
-    "data_root_dir = base_config.data_root_directory\n",
+    "results = await search(query_type=SearchType.CODE, query_text=\"def create_graphrag_config\")\n",
    "\n",
-    "if not Path(data_root_dir).exists():\n",
-    "    Path(data_root_dir).mkdir()\n",
-    "\n",
-    "filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n",
-    "if not filepath.exists():\n",
-    "    url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n",
-    "    wget.download(url, out=data_root_dir)\n",
-    "\n",
-    "with open(filepath, \"r\") as file:\n",
-    "    dataset = json.load(file)\n",
-    "\n",
-    "instances = dataset if not num_samples else dataset[:num_samples]\n",
-    "answers = []\n",
-    "for instance in tqdm(instances, desc=\"Getting answers\"):\n",
-    "    answer = answer_provider(instance)\n",
-    "    answers.append(answer)"
+    "print(results)\n"
   ]
  }
 ],
--- a/notebooks/cognee_graphiti_demo.ipynb
+++ b/notebooks/cognee_graphiti_demo.ipynb
@ -51,7 +51,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-15T10:43:57.893763Z",
@ -73,22 +73,15 @@
    "if \"OPENAI_API_KEY\" not in os.environ:\n",
    "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
-    "# Graphiti integration is only tested with neo4j + pgvector + postgres for now\n",
    "GRAPH_DATABASE_PROVIDER = \"neo4j\"\n",
-    "GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
    "GRAPH_DATABASE_USERNAME = \"neo4j\"\n",
    "GRAPH_DATABASE_PASSWORD = \"pleaseletmein\"\n",
+    "GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
    "\n",
-    "os.environ[\"VECTOR_DB_PROVIDER\"] = \"pgvector\"\n",
-    "\n",
-    "os.environ[\"DB_PROVIDER\"] = \"postgres\"\n",
-    "\n",
-    "os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
-    "\n",
-    "os.environ[\"DB_HOST\"] = \"127.0.0.1\"\n",
-    "os.environ[\"DB_PORT\"] = \"5432\"\n",
-    "os.environ[\"DB_USERNAME\"] = \"cognee\"\n",
-    "os.environ[\"DB_PASSWORD\"] = \"cognee\""
+    "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = GRAPH_DATABASE_PROVIDER\n",
+    "os.environ[\"GRAPH_DATABASE_USERNAME\"] = GRAPH_DATABASE_USERNAME\n",
+    "os.environ[\"GRAPH_DATABASE_PASSWORD\"] = GRAPH_DATABASE_PASSWORD\n",
+    "os.environ[\"GRAPH_DATABASE_URL\"] = GRAPH_DATABASE_URL\n"
   ]
  },
  {
@ -100,7 +93,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-15T10:43:57.928664Z",
@ -125,36 +118,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-15T10:44:25.008501Z",
     "start_time": "2025-01-15T10:43:57.932240Z"
    }
   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Database deleted successfully.\n",
-      "Database deleted successfully.\n",
-      "User d3b51a32-38e1-4fe5-8270-6dc1d6ebfdf0 has registered.\n",
-      "Pipeline file_load_from_filesystem load step completed in 0.10 seconds\n",
-      "1 load package(s) were loaded to destination sqlalchemy and into dataset public\n",
-      "The sqlalchemy destination used postgresql://cognee:***@127.0.0.1:5432/cognee_db location to store data\n",
-      "Load package 1736937839.7739599 is LOADED and contains no failed jobs\n",
-      "Pipeline file_load_from_filesystem load step completed in 0.06 seconds\n",
-      "1 load package(s) were loaded to destination sqlalchemy and into dataset public\n",
-      "The sqlalchemy destination used postgresql://cognee:***@127.0.0.1:5432/cognee_db location to store data\n",
-      "Load package 1736937841.8467042 is LOADED and contains no failed jobs\n",
-      "Graph database initialized.\n",
-      "Added text: Kamala Harris is the Attorney Gener...\n",
-      "Added text: As AG, Harris was in office from Ja...\n",
-      "✅ Result Processed: <graphiti_core.graphiti.Graphiti object at 0x326fe0ce0>\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# 🔧 Setting Up Logging to Suppress Errors\n",
    "setup_logging(logging.ERROR)  # Keeping logs clean and focused\n",
@ -202,15 +173,7 @@
     "start_time": "2025-01-15T10:44:25.013325Z"
    }
   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "💡 Answer: Kamala Harris was in office as Attorney General of California from January 3, 2011, to January 3, 2017.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Step 1: Formulating the Query 🔍\n",
    "query = \"When was Kamala Harris in office?\"\n",
@ -260,7 +223,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.11.8"
  }
 },
 "nbformat": 4,
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -86,6 +86,7 @@ plotly = {version = "^6.0.0", optional = true}
 gdown = {version = "^5.2.0", optional = true}
 pyside6 = {version = "^6.8.2.1", optional = true}
 qasync = {version = "^0.27.1", optional = true}
+graphiti-core = {version = "^0.7.0", optional = true}


 [tool.poetry.extras]
@ -109,6 +110,7 @@ docs = ["unstructured"]
 codegraph = ["fastembed", "tree-sitter", "tree-sitter-python"]
 evals = ["plotly", "gdown"]
 gui = ["pyside6", "qasync"]
+graphiti = ["graphiti-core"]

 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.0"