Merge pull request #191 from topoteretes/COG-555-codegen-notebook
Add code_graph_demo notebook
This commit is contained in:
commit
bcd397c0b0
3 changed files with 141 additions and 2 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -4,7 +4,8 @@
|
||||||
.prod.env
|
.prod.env
|
||||||
cognee/.data/
|
cognee/.data/
|
||||||
|
|
||||||
|
*.lance/
|
||||||
|
.DS_Store
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
|
||||||
|
|
@ -28,4 +28,4 @@ class OntologyEdge(BaseModel):
|
||||||
|
|
||||||
class GraphOntology(BaseModel):
|
class GraphOntology(BaseModel):
|
||||||
nodes: list[OntologyNode]
|
nodes: list[OntologyNode]
|
||||||
edges: list[OntologyEdge]
|
edges: list[OntologyEdge]
|
||||||
138
notebooks/cognee_code_graph_demo.ipynb
Normal file
138
notebooks/cognee_code_graph_demo.ipynb
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"os.environ['GRAPHISTRY_USERNAME'] = input(\"Please enter your graphistry username\")\n",
|
||||||
|
"os.environ['GRAPHISTRY_PASSWORD'] = input(\"Please enter your graphistry password\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from cognee.modules.users.methods import get_default_user\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.modules.data.methods import get_datasets\n",
|
||||||
|
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
|
||||||
|
"from cognee.modules.data.models import Data\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
||||||
|
"from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n",
|
||||||
|
"from cognee.tasks.graph import extract_graph_from_code\n",
|
||||||
|
"from cognee.tasks.storage import add_data_points\n",
|
||||||
|
"from cognee.shared.SourceCodeGraph import SourceCodeGraph\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.modules.pipelines import run_tasks\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.shared.utils import render_graph\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"user = await get_default_user()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"existing_datasets = await get_datasets(user.id)\n",
|
||||||
|
"\n",
|
||||||
|
"datasets = {}\n",
|
||||||
|
"for dataset in existing_datasets:\n",
|
||||||
|
" dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n",
|
||||||
|
" data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
|
||||||
|
" datasets[dataset_name] = data_documents\n",
|
||||||
|
"print(datasets.keys())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tasks = [\n",
|
||||||
|
" Task(classify_documents),\n",
|
||||||
|
" Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
|
||||||
|
" Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n",
|
||||||
|
" Task(add_data_points, task_config = { \"batch_size\": 10 }),\n",
|
||||||
|
" Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"async def run_codegraph_pipeline(tasks, data_documents):\n",
|
||||||
|
" pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n",
|
||||||
|
" results = []\n",
|
||||||
|
" async for result in pipeline:\n",
|
||||||
|
" results.append(result)\n",
|
||||||
|
" return(results)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"await render_graph(None, include_nodes = True, include_labels = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "cognee",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue