fix: notebooks (#818)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Daniel Molnar 2025-05-13 18:13:26 +02:00 committed by GitHub
parent e3121f5b1f
commit 91f3cd9ef7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 178 additions and 2243 deletions

View file

@ -467,7 +467,7 @@
"from cognee.modules.data.models import Dataset, Data\n", "from cognee.modules.data.models import Dataset, Data\n",
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n", "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
"from cognee.modules.cognify.config import get_cognify_config\n", "from cognee.modules.cognify.config import get_cognify_config\n",
"from cognee.modules.pipelines.tasks.Task import Task\n", "from cognee.modules.pipelines.tasks.task import Task\n",
"from cognee.modules.pipelines import run_tasks\n", "from cognee.modules.pipelines import run_tasks\n",
"from cognee.modules.users.models import User\n", "from cognee.modules.users.models import User\n",
"from cognee.tasks.documents import (\n", "from cognee.tasks.documents import (\n",
@ -505,7 +505,7 @@
" Task(add_data_points, task_config={\"batch_size\": 10}),\n", " Task(add_data_points, task_config={\"batch_size\": 10}),\n",
" ]\n", " ]\n",
"\n", "\n",
" pipeline_run = run_tasks(tasks, dataset.id, data_documents, \"cognify_pipeline\")\n", " pipeline_run = run_tasks(tasks, dataset.id, data_documents, user, \"cognify_pipeline\")\n",
" pipeline_run_status = None\n", " pipeline_run_status = None\n",
"\n", "\n",
" async for run_status in pipeline_run:\n", " async for run_status in pipeline_run:\n",
@ -529,8 +529,11 @@
"source": [ "source": [
"from cognee.modules.users.methods import get_default_user\n", "from cognee.modules.users.methods import get_default_user\n",
"from cognee.modules.data.methods import get_datasets_by_name\n", "from cognee.modules.data.methods import get_datasets_by_name\n",
"from cognee.modules.users.methods import get_user\n",
"\n", "\n",
"user = await get_default_user()\n", "default_user = await get_default_user()\n",
"\n",
"user = await get_user(default_user.id)\n",
"\n", "\n",
"datasets = await get_datasets_by_name([\"example\"], user.id)\n", "datasets = await get_datasets_by_name([\"example\"], user.id)\n",
"\n", "\n",
@ -604,39 +607,6 @@
"visualization_server(port=8002)" "visualization_server(port=8002)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "765bc42a143e98af",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-09T21:46:07.783693Z",
"start_time": "2025-02-09T21:46:07.780709Z"
}
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1382358-433c-4cd0-8535-9e103f821034",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6332d5bc-882f-49d5-8496-582e3954567a",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import IFrame, display, HTML\n",
"\n",
"IFrame(\"http://127.0.0.1:8002/.artifacts/graph_visualization.html\", width=800, height=600)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -837,14 +807,6 @@
"### Give us a star if you like it!\n", "### Give us a star if you like it!\n",
"https://github.com/topoteretes/cognee" "https://github.com/topoteretes/cognee"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c081f2d53512199",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {

View file

@ -15,7 +15,7 @@
} }
}, },
"source": [ "source": [
"First we import the necessary libaries" "First we import the necessary libraries"
] ]
}, },
{ {
@ -24,9 +24,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import asyncio\n",
"\n",
"import cognee\n", "import cognee\n",
"from cognee.shared.logging_utils import get_logger, ERROR\n", "from cognee.shared.logging_utils import get_logger, ERROR\n",
"import warnings\n",
"from cognee.modules.pipelines import Task, run_tasks\n", "from cognee.modules.pipelines import Task, run_tasks\n",
"from cognee.tasks.temporal_awareness import build_graph_with_temporal_awareness\n", "from cognee.tasks.temporal_awareness import build_graph_with_temporal_awareness\n",
"from cognee.infrastructure.databases.relational import (\n", "from cognee.infrastructure.databases.relational import (\n",
@ -38,7 +39,8 @@
"from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search\n", "from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search\n",
"from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever\n", "from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever\n",
"from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt\n", "from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt\n",
"from cognee.infrastructure.llm.get_llm_client import get_llm_client" "from cognee.infrastructure.llm.get_llm_client import get_llm_client\n",
"from cognee.modules.users.methods import get_default_user"
] ]
}, },
{ {
@ -126,33 +128,25 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"# 🔧 Setting Up Logging to Suppress Errors\n", "await cognee.prune.prune_data()\n",
"logger = get_logger(level=ERROR) # Keeping logs clean and focused\n",
"\n",
"# 🧹 Pruning Old Data and Metadata\n",
"await cognee.prune.prune_data() # Removing outdated data\n",
"await cognee.prune.prune_system(metadata=True)\n", "await cognee.prune.prune_system(metadata=True)\n",
"\n",
"# 🏗️ Creating Relational Database and Tables\n",
"await create_relational_db_and_tables()\n", "await create_relational_db_and_tables()\n",
"\n", "\n",
"# 📚 Adding Text Data to Cognee\n", "# Initialize default user\n",
"user = await get_default_user()\n",
"\n",
"for text in text_list:\n", "for text in text_list:\n",
" await cognee.add(text)\n", " await cognee.add(text)\n",
"\n", "\n",
"# 🕰️ Building Temporal-Aware Graphs\n",
"tasks = [\n", "tasks = [\n",
" Task(build_graph_with_temporal_awareness, text_list=text_list),\n", " Task(build_graph_with_temporal_awareness, text_list=text_list),\n",
"]\n", " ]\n",
"\n", "\n",
"# 🚀 Running the Task Pipeline\n", "pipeline = run_tasks(tasks, user=user)\n",
"pipeline = run_tasks(tasks)\n",
"\n", "\n",
"# 🌟 Processing Pipeline Results\n",
"async for result in pipeline:\n", "async for result in pipeline:\n",
" print(f\"✅ Result Processed: {result}\")\n", " print(result)\n",
"\n", "\n",
"# 🔄 Indexing and Transforming Graph Data\n",
"await index_and_transform_graphiti_nodes_and_edges()" "await index_and_transform_graphiti_nodes_and_edges()"
] ]
}, },

View file

@ -12,7 +12,9 @@
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": "%pip install llama-index-core\n" "source": [
"%pip install llama-index-core\n"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -116,10 +118,10 @@
] ]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"outputs": [],
"execution_count": null, "execution_count": null,
"metadata": {},
"outputs": [],
"source": [ "source": [
"from typing import Union, BinaryIO\n", "from typing import Union, BinaryIO\n",
"\n", "\n",

View file

@ -10,11 +10,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "982b897a29a26f7d", "id": "982b897a29a26f7d",
"metadata": {}, "metadata": {},
"source": "!pip install cognee==0.1.36",
"outputs": [], "outputs": [],
"execution_count": null "source": [
"!pip install cognee==0.1.39"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -28,15 +30,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "initial_id", "id": "initial_id",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"\n", "\n",
"os.environ[\"LLM_API_KEY\"] = \"\"" "os.environ[\"LLM_API_KEY\"] = \"\""
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -48,14 +50,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "5805c346f03d8070", "id": "5805c346f03d8070",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"current_directory = os.getcwd()\n", "current_directory = os.getcwd()\n",
"file_path = os.path.join(current_directory, \"data\", \"alice_in_wonderland.txt\")" "file_path = os.path.join(current_directory, \"data\", \"alice_in_wonderland.txt\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -67,15 +69,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "875763366723ee48", "id": "875763366723ee48",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"import cognee\n", "import cognee\n",
"await cognee.add(file_path)\n", "await cognee.add(file_path)\n",
"await cognee.cognify()" "await cognee.cognify()"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -87,33 +89,33 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "29b3a1e3279100d2", "id": "29b3a1e3279100d2",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"await cognee.search(\"List me all the influential characters in Alice in Wonderland.\")" "await cognee.search(\"List me all the influential characters in Alice in Wonderland.\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "883ce50d2d9dc584", "id": "883ce50d2d9dc584",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"await cognee.search(\"How did Alice end up in Wonderland?\")" "await cognee.search(\"How did Alice end up in Wonderland?\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "677e1bc52aa078b6", "id": "677e1bc52aa078b6",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"await cognee.search(\"Tell me about Alice's personality.\")" "await cognee.search(\"Tell me about Alice's personality.\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -125,8 +127,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"id": "6effdae590b795d3", "id": "6effdae590b795d3",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"import webbrowser\n", "import webbrowser\n",
"import os\n", "import os\n",
@ -136,9 +140,7 @@
"html_file = os.path.join(home_dir, \"graph_visualization.html\")\n", "html_file = os.path.join(home_dir, \"graph_visualization.html\")\n",
"display(html_file)\n", "display(html_file)\n",
"webbrowser.open(f\"file://{html_file}\")" "webbrowser.open(f\"file://{html_file}\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",

View file

@ -1,128 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="https://d3js.org/d3.v5.min.js"></script>
<style>
body, html { margin: 0; padding: 0; width: 100%; height: 100%; overflow: hidden; background: linear-gradient(90deg, #101010, #1a1a2e); color: white; font-family: 'Inter', sans-serif; }
svg { width: 100vw; height: 100vh; display: block; }
.links line { stroke: rgba(255, 255, 255, 0.4); stroke-width: 2px; }
.nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); }
.node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
.edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
</style>
</head>
<body>
<svg></svg>
<script>
var nodes = [];
var links = [];
var svg = d3.select("svg"),
width = window.innerWidth,
height = window.innerHeight;
var container = svg.append("g");
var simulation = d3.forceSimulation(nodes)
.force("link", d3.forceLink(links).id(d => d.id).strength(0.1))
.force("charge", d3.forceManyBody().strength(-275))
.force("center", d3.forceCenter(width / 2, height / 2))
.force("x", d3.forceX().strength(0.1).x(width / 2))
.force("y", d3.forceY().strength(0.1).y(height / 2));
var link = container.append("g")
.attr("class", "links")
.selectAll("line")
.data(links)
.enter().append("line")
.attr("stroke-width", 2);
var edgeLabels = container.append("g")
.attr("class", "edge-labels")
.selectAll("text")
.data(links)
.enter().append("text")
.attr("class", "edge-label")
.text(d => d.relation);
var nodeGroup = container.append("g")
.attr("class", "nodes")
.selectAll("g")
.data(nodes)
.enter().append("g");
var node = nodeGroup.append("circle")
.attr("r", 13)
.attr("fill", d => d.color)
.call(d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended));
nodeGroup.append("text")
.attr("class", "node-label")
.attr("dy", 4)
.attr("text-anchor", "middle")
.text(d => d.name);
node.append("title").text(d => JSON.stringify(d));
simulation.on("tick", function() {
link.attr("x1", d => d.source.x)
.attr("y1", d => d.source.y)
.attr("x2", d => d.target.x)
.attr("y2", d => d.target.y);
edgeLabels
.attr("x", d => (d.source.x + d.target.x) / 2)
.attr("y", d => (d.source.y + d.target.y) / 2 - 5);
node.attr("cx", d => d.x)
.attr("cy", d => d.y);
nodeGroup.select("text")
.attr("x", d => d.x)
.attr("y", d => d.y)
.attr("dy", 4)
.attr("text-anchor", "middle");
});
svg.call(d3.zoom().on("zoom", function() {
container.attr("transform", d3.event.transform);
}));
function dragstarted(d) {
if (!d3.event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x;
d.fy = d.y;
}
function dragged(d) {
d.fx = d3.event.x;
d.fy = d3.event.y;
}
function dragended(d) {
if (!d3.event.active) simulation.alphaTarget(0);
d.fx = null;
d.fy = null;
}
window.addEventListener("resize", function() {
width = window.innerWidth;
height = window.innerHeight;
svg.attr("width", width).attr("height", height);
simulation.force("center", d3.forceCenter(width / 2, height / 2));
simulation.alpha(1).restart();
});
</script>
<svg style="position: fixed; bottom: 10px; right: 10px; width: 150px; height: auto; z-index: 9999;" viewBox="0 0 158 44" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M11.7496 4.92654C7.83308 4.92654 4.8585 7.94279 4.8585 11.3612V14.9304C4.8585 18.3488 7.83308 21.3651 11.7496 21.3651C13.6831 21.3651 15.0217 20.8121 16.9551 19.3543C18.0458 18.5499 19.5331 18.8013 20.3263 19.9072C21.1195 21.0132 20.8717 22.5213 19.781 23.3257C17.3518 25.0851 15.0217 26.2414 11.7 26.2414C5.35425 26.2414 0 21.2646 0 14.9304V11.3612C0 4.97681 5.35425 0.0502739 11.7 0.0502739C15.0217 0.0502739 17.3518 1.2065 19.781 2.96598C20.8717 3.77032 21.1195 5.27843 20.3263 6.38439C19.5331 7.49035 18.0458 7.69144 16.9551 6.93737C15.0217 5.52979 13.6831 4.92654 11.7496 4.92654ZM35.5463 4.92654C31.7289 4.92654 28.6552 8.04333 28.6552 11.8639V14.478C28.6552 18.2986 31.7289 21.4154 35.5463 21.4154C39.3141 21.4154 42.3878 18.2986 42.3878 14.478V11.8639C42.3878 8.04333 39.3141 4.92654 35.5463 4.92654ZM23.7967 11.8639C23.7967 5.32871 29.0518 0 35.5463 0C42.0408 0 47.2463 5.32871 47.2463 11.8639V14.478C47.2463 21.0132 42.0408 26.3419 35.5463 26.3419C29.0518 26.3419 23.7967 21.0635 23.7967 14.478V11.8639ZM63.3091 5.07736C59.4917 5.07736 56.418 8.19415 56.418 12.0147C56.418 15.8353 59.4917 18.9521 63.3091 18.9521C67.1265 18.9521 70.1506 15.8856 70.1506 12.0147C70.1506 8.14388 67.0769 5.07736 63.3091 5.07736ZM51.5595 11.9645C51.5595 5.42925 56.8146 0.150814 63.3091 0.150814C66.0854 0.150814 68.5642 1.10596 70.5968 2.71463L72.4311 0.904876C73.3731 -0.0502693 74.9099 -0.0502693 75.8519 0.904876C76.7938 1.86002 76.7938 3.41841 75.8519 4.37356L73.7201 6.53521C74.5629 8.19414 75.0587 10.0542 75.0587 12.0147C75.0587 18.4997 69.8532 23.8284 63.3587 23.8284C63.3091 23.8284 63.2099 23.8284 63.1603 23.8284H58.0044C57.1616 23.8284 56.4675 24.5322 56.4675 25.3868C56.4675 26.2414 57.1616 26.9452 58.0044 26.9452H64.6476H66.7794C68.5146 26.9452 70.3489 27.4479 71.7866 28.6041C73.2739 29.8106 74.2159 31.5701 74.4142 33.7317C74.7116 37.6026 72.0345 40.2166 69.8532 41.0713L63.8048 43.7859C62.5654 44.3389 61.1277 43.7859 60.6319 42.5291C60.0866 41.2723 60.6319 39.8648 61.8714 39.3118L68.0188 36.5972C68.0684 36.5972 68.118 36.5469 68.1675 36.5469C68.4154 36.4463 68.8616 36.1447 69.2087 35.6923C69.5061 35.2398 69.7044 34.7371 69.6548 34.1339C69.6053 33.229 69.2582 32.7263 68.8616 32.4247C68.4154 32.0728 67.7214 31.8214 66.8786 31.8214H58.2027C58.1531 31.8214 58.1531 31.8214 58.1035 31.8214H58.054C54.534 31.8214 51.6586 28.956 51.6586 25.3868C51.6586 23.0743 52.8485 21.0635 54.6828 19.9072C52.6997 17.7959 51.5595 15.031 51.5595 11.9645ZM90.8736 5.07736C87.0562 5.07736 83.9824 8.19415 83.9824 12.0147V23.9289C83.9824 25.2862 82.8917 26.3922 81.5532 26.3922C80.2146 26.3922 79.1239 25.2862 79.1239 23.9289V11.9645C79.1239 5.42925 84.379 0.150814 90.824 0.150814C97.2689 0.150814 102.524 5.42925 102.524 11.9645V23.8786C102.524 25.2359 101.433 26.3419 100.095 26.3419C98.7562 26.3419 97.6655 25.2359 97.6655 23.8786V11.9645C97.7647 8.14387 94.6414 5.07736 90.8736 5.07736ZM119.43 5.07736C115.513 5.07736 112.39 8.24441 112.39 12.065V14.5785C112.39 18.4494 115.513 21.5662 119.43 21.5662C120.768 21.5662 122.057 21.164 123.098 20.5105C124.238 19.8067 125.726 20.1586 126.42 21.3148C127.114 22.4711 126.767 23.9792 125.627 24.683C123.842 25.7889 121.71 26.4425 119.43 26.4425C112.885 26.4425 107.581 21.1137 107.581 14.5785V12.065C107.581 5.47952 112.935 0.201088 119.43 0.201088C125.032 0.201088 129.692 4.07194 130.931 9.3001L131.427 11.3612L121.115 15.584C119.876 16.0867 118.488 15.4834 117.942 14.2266C117.447 12.9699 118.041 11.5623 119.281 11.0596L125.478 8.54604C124.238 6.43466 122.008 5.07736 119.43 5.07736ZM146.003 5.07736C142.086 5.07736 138.963 8.24441 138.963 12.065V14.5785C138.963 18.4494 142.086 21.5662 146.003 21.5662C147.341 21.5662 148.63 21.164 149.671 20.5105C150.217 20.1586 150.663 19.8067 151.109 19.304C152.001 18.2986 153.538 18.2483 154.53 19.2034C155.521 20.1083 155.571 21.6667 154.629 22.6721C153.935 23.4262 153.092 24.13 152.2 24.683C150.415 25.7889 148.283 26.4425 146.003 26.4425C139.458 26.4425 134.154 21.1137 134.154 14.5785V12.065C134.154 5.47952 139.508 0.201088 146.003 0.201088C151.605 0.201088 156.265 4.07194 157.504 9.3001L158 11.3612L147.688 15.584C146.449 16.0867 145.061 15.4834 144.515 14.2266C144.019 12.9699 144.614 11.5623 145.854 11.0596L152.051 8.54604C150.762 6.43466 148.58 5.07736 146.003 5.07736Z" fill="white"/>
</svg>
</body>
</html>

View file

@ -1,8 +1,8 @@
{ {
"cells": [ "cells": [
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"## Cognee GraphRAG\n", "## Cognee GraphRAG\n",
"\n", "\n",
@ -48,15 +48,19 @@
] ]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"source": "!pip install cognee==0.1.24", "execution_count": null,
"metadata": {},
"outputs": [], "outputs": [],
"execution_count": null "source": [
"!pip install cognee==0.1.39"
]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"import cognee\n", "import cognee\n",
@ -66,13 +70,11 @@
"\n", "\n",
"if \"OPENAI_API_KEY\" not in os.environ:\n", "if \"OPENAI_API_KEY\" not in os.environ:\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"" " os.environ[\"OPENAI_API_KEY\"] = \"\""
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Ensure youve set up your API keys and installed necessary dependencies.\n", "Ensure youve set up your API keys and installed necessary dependencies.\n",
"\n", "\n",
@ -82,19 +84,19 @@
] ]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [ "source": [
"documents = [\"Jessica Miller, Experienced Sales Manager with a strong track record in building high-performing teams.\",\n", "documents = [\"Jessica Miller, Experienced Sales Manager with a strong track record in building high-performing teams.\",\n",
" \"David Thompson, Creative Graphic Designer with over 8 years of experience in visual design and branding.\"\n", " \"David Thompson, Creative Graphic Designer with over 8 years of experience in visual design and branding.\"\n",
" ]" " ]"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"### 3. Adding Data to Cognee\n", "### 3. Adding Data to Cognee\n",
"\n", "\n",
@ -102,15 +104,17 @@
] ]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"source": "await cognee.add(documents)", "execution_count": null,
"metadata": {},
"outputs": [], "outputs": [],
"execution_count": null "source": [
"await cognee.add(documents)"
]
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"This step prepares the data for graph-based processing.\n", "This step prepares the data for graph-based processing.\n",
"\n", "\n",
@ -120,15 +124,17 @@
] ]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"source": "await cognee.cognify()", "execution_count": null,
"metadata": {},
"outputs": [], "outputs": [],
"execution_count": null "source": [
"await cognee.cognify()"
]
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"The graph now contains nodes and relationships derived from the dataset, creating a powerful structure for exploration.\n", "The graph now contains nodes and relationships derived from the dataset, creating a powerful structure for exploration.\n",
"\n", "\n",
@ -138,45 +144,49 @@
] ]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [ "source": [
"from cognee.modules.search.types import SearchType\n", "from cognee.api.v1.search import SearchType\n",
"search_results = await cognee.search(SearchType.GRAPH_COMPLETION, \"Tell me who are the people mentioned?\")\n", "search_results = await cognee.search(query_type=SearchType.GRAPH_COMPLETION, query_text=\"Tell me who are the people mentioned?\")\n",
"\n", "\n",
"print(\"\\n\\nAnswer based on knowledge graph:\\n\")\n", "print(\"\\n\\nAnswer based on knowledge graph:\\n\")\n",
"for result in search_results:\n", "for result in search_results:\n",
" print(f\"{result}\\n\")" " print(f\"{result}\\n\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": "### Answer prompt based on RAG approach:" "metadata": {},
"source": [
"### Answer prompt based on RAG approach:"
]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [ "source": [
"search_results = await cognee.search(SearchType.COMPLETION, \"Tell me who are the people mentioned?\")\n", "search_results = await cognee.search(query_type=SearchType.RAG_COMPLETION, query_text=\"Tell me who are the people mentioned?\")\n",
"\n", "\n",
"print(\"\\n\\nAnswer based on RAG:\\n\")\n", "print(\"\\n\\nAnswer based on RAG:\\n\")\n",
"for result in search_results:\n", "for result in search_results:\n",
" print(f\"{result}\\n\")" " print(f\"{result}\\n\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": "In conclusion, the results demonstrate a significant advantage of the knowledge graph-based approach (Graphrag) over the RAG approach. Graphrag successfully identified all the mentioned individuals across multiple documents, showcasing its ability to aggregate and infer information from a global context. In contrast, the RAG approach was limited to identifying individuals within a single document due to its chunking-based processing constraints. This highlights Graphrag's superior capability in comprehensively resolving queries that span across a broader corpus of interconnected data." "metadata": {},
"source": [
"In conclusion, the results demonstrate a significant advantage of the knowledge graph-based approach (Graphrag) over the RAG approach. Graphrag successfully identified all the mentioned individuals across multiple documents, showcasing its ability to aggregate and infer information from a global context. In contrast, the RAG approach was limited to identifying individuals within a single document due to its chunking-based processing constraints. This highlights Graphrag's superior capability in comprehensively resolving queries that span across a broader corpus of interconnected data."
]
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"### 7. Finding Related Nodes\n", "### 7. Finding Related Nodes\n",
"\n", "\n",
@ -184,21 +194,21 @@
] ]
}, },
{ {
"metadata": {},
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [ "source": [
"related_nodes = await cognee.search(SearchType.INSIGHTS, \"person\")\n", "related_nodes = await cognee.search(query_type=SearchType.INSIGHTS, query_text=\"person\")\n",
"\n", "\n",
"print(\"\\n\\nRelated nodes are:\\n\")\n", "print(\"\\n\\nRelated nodes are:\\n\")\n",
"for node in related_nodes:\n", "for node in related_nodes:\n",
" print(f\"{node}\\n\")" " print(f\"{node}\\n\")"
], ]
"outputs": [],
"execution_count": null
}, },
{ {
"metadata": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"## Why Choose Cognee?\n", "## Why Choose Cognee?\n",
"\n", "\n",
@ -233,9 +243,9 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"name": "python3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"display_name": "Python 3 (ipykernel)" "name": "python3"
} }
}, },
"nbformat": 4, "nbformat": 4,

View file

@ -1,978 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d35ac8ce-0f92-46f5-9ba4-a46970f0ce19",
"metadata": {},
"source": [
"# Cognee - Get Started"
]
},
{
"cell_type": "markdown",
"id": "074f0ea8-c659-4736-be26-be4b0e5ac665",
"metadata": {},
"source": [
"# Demo time"
]
},
{
"cell_type": "markdown",
"id": "0587d91d",
"metadata": {},
"source": [
"#### First let's define some data that we will cognify and perform a search on"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df16431d0f48b006",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:48.519686Z",
"start_time": "2024-09-20T14:02:48.515589Z"
}
},
"outputs": [],
"source": [
"job_position = \"\"\"Senior Data Scientist (Machine Learning)\n",
"\n",
"Company: TechNova Solutions\n",
"Location: San Francisco, CA\n",
"\n",
"Job Description:\n",
"\n",
"TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights.\n",
"\n",
"Responsibilities:\n",
"\n",
"Develop and implement advanced machine learning algorithms and models.\n",
"Analyze large, complex datasets to extract meaningful patterns and insights.\n",
"Collaborate with cross-functional teams to integrate predictive models into products.\n",
"Stay updated with the latest advancements in machine learning and data science.\n",
"Mentor junior data scientists and provide technical guidance.\n",
"Qualifications:\n",
"\n",
"Masters or Ph.D. in Data Science, Computer Science, Statistics, or a related field.\n",
"5+ years of experience in data science and machine learning.\n",
"Proficient in Python, R, and SQL.\n",
"Experience with deep learning frameworks (e.g., TensorFlow, PyTorch).\n",
"Strong problem-solving skills and attention to detail.\n",
"Candidate CVs\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9086abf3af077ab4",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:49.120838Z",
"start_time": "2024-09-20T14:02:49.118294Z"
}
},
"outputs": [],
"source": [
"job_1 = \"\"\"\n",
"CV 1: Relevant\n",
"Name: Dr. Emily Carter\n",
"Contact Information:\n",
"\n",
"Email: emily.carter@example.com\n",
"Phone: (555) 123-4567\n",
"Summary:\n",
"\n",
"Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.\n",
"\n",
"Education:\n",
"\n",
"Ph.D. in Computer Science, Stanford University (2014)\n",
"B.S. in Mathematics, University of California, Berkeley (2010)\n",
"Experience:\n",
"\n",
"Senior Data Scientist, InnovateAI Labs (2016 Present)\n",
"Led a team in developing machine learning models for natural language processing applications.\n",
"Implemented deep learning algorithms that improved prediction accuracy by 25%.\n",
"Collaborated with cross-functional teams to integrate models into cloud-based platforms.\n",
"Data Scientist, DataWave Analytics (2014 2016)\n",
"Developed predictive models for customer segmentation and churn analysis.\n",
"Analyzed large datasets using Hadoop and Spark frameworks.\n",
"Skills:\n",
"\n",
"Programming Languages: Python, R, SQL\n",
"Machine Learning: TensorFlow, Keras, Scikit-Learn\n",
"Big Data Technologies: Hadoop, Spark\n",
"Data Visualization: Tableau, Matplotlib\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9de0cc07f798b7f",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:49.675003Z",
"start_time": "2024-09-20T14:02:49.671615Z"
}
},
"outputs": [],
"source": [
"job_2 = \"\"\"\n",
"CV 2: Relevant\n",
"Name: Michael Rodriguez\n",
"Contact Information:\n",
"\n",
"Email: michael.rodriguez@example.com\n",
"Phone: (555) 234-5678\n",
"Summary:\n",
"\n",
"Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.\n",
"\n",
"Education:\n",
"\n",
"M.S. in Data Science, Carnegie Mellon University (2013)\n",
"B.S. in Computer Science, University of Michigan (2011)\n",
"Experience:\n",
"\n",
"Senior Data Scientist, Alpha Analytics (2017 Present)\n",
"Developed machine learning models to optimize marketing strategies.\n",
"Reduced customer acquisition cost by 15% through predictive modeling.\n",
"Data Scientist, TechInsights (2013 2017)\n",
"Analyzed user behavior data to improve product features.\n",
"Implemented A/B testing frameworks to evaluate product changes.\n",
"Skills:\n",
"\n",
"Programming Languages: Python, Java, SQL\n",
"Machine Learning: Scikit-Learn, XGBoost\n",
"Data Visualization: Seaborn, Plotly\n",
"Databases: MySQL, MongoDB\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "185ff1c102d06111",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:50.286828Z",
"start_time": "2024-09-20T14:02:50.284369Z"
}
},
"outputs": [],
"source": [
"job_3 = \"\"\"\n",
"CV 3: Relevant\n",
"Name: Sarah Nguyen\n",
"Contact Information:\n",
"\n",
"Email: sarah.nguyen@example.com\n",
"Phone: (555) 345-6789\n",
"Summary:\n",
"\n",
"Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.\n",
"\n",
"Education:\n",
"\n",
"M.S. in Statistics, University of Washington (2014)\n",
"B.S. in Applied Mathematics, University of Texas at Austin (2012)\n",
"Experience:\n",
"\n",
"Data Scientist, QuantumTech (2016 Present)\n",
"Designed and implemented machine learning algorithms for financial forecasting.\n",
"Improved model efficiency by 20% through algorithm optimization.\n",
"Junior Data Scientist, DataCore Solutions (2014 2016)\n",
"Assisted in developing predictive models for supply chain optimization.\n",
"Conducted data cleaning and preprocessing on large datasets.\n",
"Skills:\n",
"\n",
"Programming Languages: Python, R\n",
"Machine Learning Frameworks: PyTorch, Scikit-Learn\n",
"Statistical Analysis: SAS, SPSS\n",
"Cloud Platforms: AWS, Azure\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d55ce4c58f8efb67",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:50.950343Z",
"start_time": "2024-09-20T14:02:50.946378Z"
}
},
"outputs": [],
"source": [
"job_4 = \"\"\"\n",
"CV 4: Not Relevant\n",
"Name: David Thompson\n",
"Contact Information:\n",
"\n",
"Email: david.thompson@example.com\n",
"Phone: (555) 456-7890\n",
"Summary:\n",
"\n",
"Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.\n",
"\n",
"Education:\n",
"\n",
"B.F.A. in Graphic Design, Rhode Island School of Design (2012)\n",
"Experience:\n",
"\n",
"Senior Graphic Designer, CreativeWorks Agency (2015 Present)\n",
"Led design projects for clients in various industries.\n",
"Created branding materials that increased client engagement by 30%.\n",
"Graphic Designer, Visual Innovations (2012 2015)\n",
"Designed marketing collateral, including brochures, logos, and websites.\n",
"Collaborated with the marketing team to develop cohesive brand strategies.\n",
"Skills:\n",
"\n",
"Design Software: Adobe Photoshop, Illustrator, InDesign\n",
"Web Design: HTML, CSS\n",
"Specialties: Branding and Identity, Typography\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ca4ecc32721ad332",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:51.548191Z",
"start_time": "2024-09-20T14:02:51.545520Z"
}
},
"outputs": [],
"source": [
"job_5 = \"\"\"\n",
"CV 5: Not Relevant\n",
"Name: Jessica Miller\n",
"Contact Information:\n",
"\n",
"Email: jessica.miller@example.com\n",
"Phone: (555) 567-8901\n",
"Summary:\n",
"\n",
"Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.\n",
"\n",
"Education:\n",
"\n",
"B.A. in Business Administration, University of Southern California (2010)\n",
"Experience:\n",
"\n",
"Sales Manager, Global Enterprises (2015 Present)\n",
"Managed a sales team of 15 members, achieving a 20% increase in annual revenue.\n",
"Developed sales strategies that expanded customer base by 25%.\n",
"Sales Representative, Market Leaders Inc. (2010 2015)\n",
"Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.\n",
"Skills:\n",
"\n",
"Sales Strategy and Planning\n",
"Team Leadership and Development\n",
"CRM Software: Salesforce, Zoho\n",
"Negotiation and Relationship Building\n",
"\"\"\""
]
},
{
"cell_type": "markdown",
"id": "4415446a",
"metadata": {},
"source": [
"#### Please add the necessary environment information bellow:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bce39dc6",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"# Setting environment variables\n",
"if \"GRAPHISTRY_USERNAME\" not in os.environ:\n",
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
"\n",
"if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n",
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
"\n",
"if \"LLM_API_KEY\" not in os.environ:\n",
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
"\n",
"# \"neo4j\" or \"networkx\"\n",
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n",
"# Not needed if using networkx\n",
"# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
"# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
"# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
"\n",
"# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
"# Not needed if using \"lancedb\" or \"pgvector\"\n",
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
"\n",
"# Relational Database provider \"sqlite\" or \"postgres\"\n",
"os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
"\n",
"# Database name\n",
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
"\n",
"# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f1a1dbd",
"metadata": {},
"outputs": [],
"source": [
"# Reset the cognee system with the following command:\n",
"\n",
"import cognee\n",
"\n",
"await cognee.prune.prune_data()\n",
"await cognee.prune.prune_system(metadata=True)"
]
},
{
"cell_type": "markdown",
"id": "383d6971",
"metadata": {},
"source": [
"#### After we have defined and gathered our data let's add it to cognee "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "904df61ba484a8e5",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:54.243987Z",
"start_time": "2024-09-20T14:02:52.498195Z"
}
},
"outputs": [],
"source": [
"import cognee\n",
"\n",
"await cognee.add([job_1, job_2, job_3, job_4, job_5, job_position], \"example\")"
]
},
{
"cell_type": "markdown",
"id": "0f15c5b1",
"metadata": {},
"source": [
"#### All good, let's cognify it."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c431fdef4921ae0",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:57.925667Z",
"start_time": "2024-09-20T14:02:57.922353Z"
}
},
"outputs": [],
"source": [
"from cognee.shared.data_models import KnowledgeGraph\n",
"from cognee.modules.data.models import Dataset, Data\n",
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
"from cognee.modules.cognify.config import get_cognify_config\n",
"from cognee.modules.pipelines.tasks.Task import Task\n",
"from cognee.modules.pipelines import run_tasks\n",
"from cognee.modules.users.models import User\n",
"from cognee.tasks.documents import (\n",
" check_permissions_on_documents,\n",
" classify_documents,\n",
" extract_chunks_from_documents,\n",
")\n",
"from cognee.tasks.graph import extract_graph_from_data\n",
"from cognee.tasks.storage import add_data_points\n",
"from cognee.tasks.summarization import summarize_text\n",
"\n",
"\n",
"async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
" data_documents: list[Data] = await get_dataset_data(dataset_id=dataset.id)\n",
"\n",
" try:\n",
" cognee_config = get_cognify_config()\n",
"\n",
" tasks = [\n",
" Task(classify_documents),\n",
" Task(check_permissions_on_documents, user=user, permissions=[\"write\"]),\n",
" Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n",
" Task(\n",
" extract_graph_from_data, graph_model=KnowledgeGraph,\n",
" task_config={\"batch_size\": 10}\n",
" ), # Generate knowledge graphs from the document chunks.\n",
" Task(\n",
" summarize_text,\n",
" summarization_model=cognee_config.summarization_model,\n",
" task_config={\"batch_size\": 10},\n",
" ),\n",
" Task(add_data_points, task_config={\"batch_size\": 10}),\n",
" ]\n",
"\n",
" pipeline = run_tasks(tasks, data_documents)\n",
"\n",
" async for result in pipeline:\n",
" print(result)\n",
" except Exception as error:\n",
" raise error"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0a91b99c6215e09",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-20T14:02:58.905774Z",
"start_time": "2024-09-20T14:02:58.625915Z"
}
},
"outputs": [],
"source": [
"from cognee.modules.users.methods import get_default_user\n",
"from cognee.modules.data.methods import get_datasets_by_name\n",
"\n",
"user = await get_default_user()\n",
"\n",
"datasets = await get_datasets_by_name([\"example\"], user.id)\n",
"\n",
"await run_cognify_pipeline(datasets[0], user)"
]
},
{
"cell_type": "markdown",
"id": "219a6d41",
"metadata": {},
"source": [
"#### We get the url to the graph on graphistry in the notebook cell bellow, showing nodes and connections made by the cognify process."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "080389e5",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from cognee.shared.utils import render_graph\n",
"from cognee.infrastructure.databases.graph import get_graph_engine\n",
"import graphistry\n",
"\n",
"graphistry.login(\n",
" username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\")\n",
")\n",
"\n",
"graph_engine = await get_graph_engine()\n",
"\n",
"graph_url = await render_graph(graph_engine.graph)\n",
"print(graph_url)"
]
},
{
"cell_type": "markdown",
"id": "59e6c3c3",
"metadata": {},
"source": [
"#### We can also do a search on the data to explore the knowledge."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5e7dfc8",
"metadata": {},
"outputs": [],
"source": [
"async def search(\n",
" vector_engine,\n",
" collection_name: str,\n",
" query_text: str = None,\n",
"):\n",
" query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]\n",
"\n",
" connection = await vector_engine.get_connection()\n",
" collection = await connection.open_table(collection_name)\n",
"\n",
" results = await collection.vector_search(query_vector).limit(10).to_pandas()\n",
"\n",
" result_values = list(results.to_dict(\"index\").values())\n",
"\n",
" return [\n",
" dict(\n",
" id=str(result[\"id\"]),\n",
" payload=result[\"payload\"],\n",
" score=result[\"_distance\"],\n",
" )\n",
" for result in result_values\n",
" ]\n",
"\n",
"\n",
"from cognee.infrastructure.databases.vector import get_vector_engine\n",
"\n",
"vector_engine = get_vector_engine()\n",
"results = await search(vector_engine, \"Entity_name\", \"sarah.nguyen@example.com\")\n",
"for result in results:\n",
" print(result)"
]
},
{
"cell_type": "markdown",
"id": "81fa2b00",
"metadata": {},
"source": [
"#### We normalize search output scores so the lower the score of the search result is the higher the chance that it's what you're looking for. In the example above we have searched for node entities in the knowledge graph related to \"sarah.nguyen@example.com\""
]
},
{
"cell_type": "markdown",
"id": "1b94ff96",
"metadata": {},
"source": [
"#### In the example bellow we'll use cognee search to summarize information regarding the node most related to \"sarah.nguyen@example.com\" in the knowledge graph"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21a3e9a6",
"metadata": {},
"outputs": [],
"source": [
"from cognee.api.v1.search import SearchType\n",
"\n",
"node = (await vector_engine.search(\"Entity_name\", \"sarah.nguyen@example.com\"))[0]\n",
"node_name = node.payload[\"text\"]\n",
"\n",
"search_results = await cognee.search(query_type=SearchType.SUMMARIES, query_text=node_name)\n",
"print(\"\\n\\Extracted summaries are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"
]
},
{
"cell_type": "markdown",
"id": "fd6e5fe2",
"metadata": {},
"source": [
"#### In this example we'll use cognee search to find chunks in which the node most related to \"sarah.nguyen@example.com\" is a part of"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7a8abff",
"metadata": {},
"outputs": [],
"source": [
"search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=node_name)\n",
"print(\"\\n\\nExtracted chunks are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"
]
},
{
"cell_type": "markdown",
"id": "47f0112f",
"metadata": {},
"source": [
"#### In this example we'll use cognee search to give us insights from the knowledge graph related to the node most related to \"sarah.nguyen@example.com\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "706a3954",
"metadata": {},
"outputs": [],
"source": [
"search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=node_name)\n",
"print(\"\\n\\nExtracted sentences are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"
]
},
{
"cell_type": "markdown",
"id": "e519e30c0423c2a",
"metadata": {},
"source": [
"## Let's add evals"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3845443e",
"metadata": {},
"outputs": [],
"source": [
"!pip install \"cognee[deepeval]\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a2c3c70",
"metadata": {},
"outputs": [],
"source": [
"from evals.eval_on_hotpot import deepeval_answers, answer_qa_instance\n",
"from evals.qa_dataset_utils import load_qa_dataset\n",
"from evals.qa_metrics_utils import get_metrics\n",
"from evals.qa_context_provider_utils import qa_context_providers\n",
"from pathlib import Path\n",
"from tqdm import tqdm\n",
"import statistics\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53a609d8",
"metadata": {},
"outputs": [],
"source": [
"num_samples = 10 # With cognee, it takes ~1m10s per sample\n",
"dataset_name_or_filename = \"hotpotqa\"\n",
"dataset = load_qa_dataset(dataset_name_or_filename)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7351ab8f",
"metadata": {},
"outputs": [],
"source": [
"context_provider_name = \"cognee\"\n",
"context_provider = qa_context_providers[context_provider_name]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9346115b",
"metadata": {},
"outputs": [],
"source": [
"random.seed(42)\n",
"instances = dataset if not num_samples else random.sample(dataset, num_samples)\n",
"\n",
"out_path = \"out\"\n",
"if not Path(out_path).exists():\n",
" Path(out_path).mkdir()\n",
"contexts_filename = out_path / Path(\n",
" f\"contexts_{dataset_name_or_filename.split('.')[0]}_{context_provider_name}.json\"\n",
")\n",
"\n",
"answers = []\n",
"for instance in tqdm(instances, desc=\"Getting answers\"):\n",
" answer = await answer_qa_instance(instance, context_provider, contexts_filename)\n",
" answers.append(answer)"
]
},
{
"cell_type": "markdown",
"id": "1e7d872d",
"metadata": {},
"source": [
"#### Define Metrics for Evaluation and Calculate Score\n",
"**Options**: \n",
"- **Correctness**: Is the actual output factually correct based on the expected output?\n",
"- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?\n",
"- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?\n",
"- **Empowerment**: How well does the answer help the reader understand and make informed judgements about the topic?\n",
"- **Directness**: How specifically and clearly does the answer address the question?\n",
"- **F1 Score**: the harmonic mean of the precision and recall, using word-level Exact Match\n",
"- **EM Score**: the rate at which the predicted strings exactly match their references, ignoring white spaces and capitalization."
]
},
{
"cell_type": "markdown",
"id": "c81e2b46",
"metadata": {},
"source": [
"##### Calculate `\"Correctness\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae728344",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Correctness\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "764aac6d",
"metadata": {},
"outputs": [],
"source": [
"Correctness = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Correctness)"
]
},
{
"cell_type": "markdown",
"id": "6d3bbdc5",
"metadata": {},
"source": [
"##### Calculating `\"Comprehensiveness\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9793ef78",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Comprehensiveness\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9add448a",
"metadata": {},
"outputs": [],
"source": [
"Comprehensiveness = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Comprehensiveness)"
]
},
{
"cell_type": "markdown",
"id": "bce2fa25",
"metadata": {},
"source": [
"##### Calculating `\"Diversity\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f60a179e",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Diversity\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ccbd0ab",
"metadata": {},
"outputs": [],
"source": [
"Diversity = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
"print(Diversity)"
]
},
{
"cell_type": "markdown",
"id": "191cab63",
"metadata": {},
"source": [
"##### Calculating`\"Empowerment\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66bec0bf",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Empowerment\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b043a8f",
"metadata": {},
"outputs": [],
"source": [
"Empowerment = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Empowerment)"
]
},
{
"cell_type": "markdown",
"id": "2cac3be9",
"metadata": {},
"source": [
"##### Calculating `\"Directness\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "adaa17c0",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Directness\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a8f97c9",
"metadata": {},
"outputs": [],
"source": [
"Directness = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
"print(Directness)"
]
},
{
"cell_type": "markdown",
"id": "1ad6feb8",
"metadata": {},
"source": [
"##### Calculating `\"F1\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bdc48259",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"F1\"]\n",
"eval_metrics = get_metrics(metric_name_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c43c17c8",
"metadata": {},
"outputs": [],
"source": [
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bfcc46d",
"metadata": {},
"outputs": [],
"source": [
"F1_score = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
"print(F1_score)"
]
},
{
"cell_type": "markdown",
"id": "2583f948",
"metadata": {},
"source": [
"##### Calculating `\"EM\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90a8f630",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"EM\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d1b1ea1",
"metadata": {},
"outputs": [],
"source": [
"EM = statistics.mean([result.metrics_data[0].score for result in eval_results.test_results])\n",
"print(EM)"
]
},
{
"cell_type": "markdown",
"id": "288ab570",
"metadata": {},
"source": [
"# Give us a star if you like it!\n",
"https://github.com/topoteretes/cognee"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "cognee-c83GrcRT-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -3,7 +3,9 @@
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1EpokQ8Y_5jIJ7HdixZms81Oqgh2sp7-E?usp=sharing)" "source": [
"[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1EpokQ8Y_5jIJ7HdixZms81Oqgh2sp7-E?usp=sharing)"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -57,7 +59,9 @@
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": "!pip install llama-index-graph-rag-cognee==0.1.3" "source": [
"!pip install llama-index-graph-rag-cognee==0.1.3"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -192,7 +196,9 @@
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": "### Answer prompt based on RAG approach:" "source": [
"### Answer prompt based on RAG approach:"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -210,7 +216,9 @@
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": "In conclusion, the results demonstrate a significant advantage of the knowledge graph-based approach (Graphrag) over the RAG approach. Graphrag successfully identified all the mentioned individuals across multiple documents, showcasing its ability to aggregate and infer information from a global context. In contrast, the RAG approach was limited to identifying individuals within a single document due to its chunking-based processing constraints. This highlights Graphrag's superior capability in comprehensively resolving queries that span across a broader corpus of interconnected data." "source": [
"In conclusion, the results demonstrate a significant advantage of the knowledge graph-based approach (Graphrag) over the RAG approach. Graphrag successfully identified all the mentioned individuals across multiple documents, showcasing its ability to aggregate and infer information from a global context. In contrast, the RAG approach was limited to identifying individuals within a single document due to its chunking-based processing constraints. This highlights Graphrag's superior capability in comprehensively resolving queries that span across a broader corpus of interconnected data."
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -271,7 +279,8 @@
], ],
"metadata": { "metadata": {
"language_info": { "language_info": {
"name": "python" "name": "python",
"version": "3.12.9"
} }
}, },
"nbformat": 4, "nbformat": 4,

File diff suppressed because one or more lines are too long