{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['GRAPHISTRY_USERNAME'] = input(\"Please enter your graphistry username\")\n", "os.environ['GRAPHISTRY_PASSWORD'] = input(\"Please enter your graphistry password\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from cognee.modules.users.methods import get_default_user\n", "\n", "from cognee.modules.data.methods import get_datasets\n", "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n", "from cognee.modules.data.models import Data\n", "\n", "from cognee.modules.pipelines.tasks.Task import Task\n", "from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n", "from cognee.tasks.graph import extract_graph_from_code\n", "from cognee.tasks.storage import add_data_points\n", "from cognee.shared.SourceCodeGraph import SourceCodeGraph\n", "\n", "from cognee.modules.pipelines import run_tasks\n", "\n", "from cognee.shared.utils import render_graph\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "user = await get_default_user()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "existing_datasets = await get_datasets(user.id)\n", "\n", "datasets = {}\n", "for dataset in existing_datasets:\n", " dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n", " data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n", " datasets[dataset_name] = data_documents\n", "print(datasets.keys())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tasks = [\n", " Task(classify_documents),\n", " Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n", " Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n", " Task(add_data_points, task_config = { \"batch_size\": 10 }),\n", " Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "async def run_codegraph_pipeline(tasks, data_documents):\n", " pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n", " results = []\n", " async for result in pipeline:\n", " results.append(result)\n", " return(results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "await render_graph(None, include_nodes = True, include_labels = True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "cognee", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 2 }