From 9fe1b6c5faafc58163e8aaf91a0f2a5b53ef57fc Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Mon, 11 Nov 2024 13:03:50 +0100 Subject: [PATCH] Add code_graph_demo notebook --- .gitignore | 1 + .../databases/graph/networkx/adapter.py | 2 +- notebooks/cognee_code_graph_demo.ipynb | 138 ++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 notebooks/cognee_code_graph_demo.ipynb diff --git a/.gitignore b/.gitignore index f447655cf..1c75b636c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .prod.env cognee/.data/ +*.lance/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index 6c7abd498..65aeea289 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -270,7 +270,7 @@ class NetworkXAdapter(GraphDBInterface): except: pass - if "updated_at" in node: + if "updated_at" in edge: edge["updated_at"] = datetime.strptime(edge["updated_at"], "%Y-%m-%dT%H:%M:%S.%f%z") self.graph = nx.readwrite.json_graph.node_link_graph(graph_data) diff --git a/notebooks/cognee_code_graph_demo.ipynb b/notebooks/cognee_code_graph_demo.ipynb new file mode 100644 index 000000000..5e21e9dad --- /dev/null +++ b/notebooks/cognee_code_graph_demo.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['GRAPHISTRY_USERNAME'] = input(\"Please enter your graphistry username\")\n", + "os.environ['GRAPHISTRY_PASSWORD'] = input(\"Please enter your graphistry password\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cognee.modules.users.methods import get_default_user\n", + "\n", + "from cognee.modules.data.methods import get_datasets\n", + "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n", + "from cognee.modules.data.models import Data\n", + "\n", + "from cognee.modules.pipelines.tasks.Task import Task\n", + "from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n", + "from cognee.tasks.graph import extract_graph_from_code\n", + "from cognee.tasks.storage import add_data_points\n", + "from cognee.shared.SourceCodeGraph import SourceCodeGraph\n", + "\n", + "from cognee.modules.pipelines import run_tasks\n", + "\n", + "from cognee.shared.utils import render_graph\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "user = await get_default_user()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "existing_datasets = await get_datasets(user.id)\n", + "\n", + "datasets = {}\n", + "for dataset in existing_datasets:\n", + " dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n", + " data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n", + " datasets[dataset_name] = data_documents\n", + "print(datasets.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tasks = [\n", + " Task(classify_documents),\n", + " Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n", + " Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n", + " Task(add_data_points, task_config = { \"batch_size\": 10 }),\n", + " Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def run_codegraph_pipeline(tasks, data_documents):\n", + " pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n", + " results = []\n", + " async for result in pipeline:\n", + " results.append(result)\n", + " return(results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "await render_graph(None, include_nodes = True, include_labels = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cognee", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}