cognee/notebooks/full_run.ipynb
2024-04-21 22:03:18 +02:00

183 lines
5.5 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "38135bf7",
"metadata": {},
"outputs": [],
"source": [
"from os import path\n",
"import cognee\n",
"import dspy\n",
"from cognee.modules.cognify.dataset import HotPotQA\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"await cognee.prune.prune_system()\n",
"\n",
"colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = \"http://20.102.90.50:2017/wiki17_abstracts\")\n",
"dspy.configure(rm = colbertv2_wiki17_abstracts)\n",
"\n",
"dataset = HotPotQA(\n",
" train_seed = 1,\n",
" train_size = 10,\n",
" eval_seed = 2023,\n",
" dev_size = 0,\n",
" test_size = 0,\n",
" keep_details = True,\n",
")\n",
"\n",
"texts_to_add = []\n",
"\n",
"for train_case in dataset.train:\n",
" train_case_text = \"\\r\\n\".join(\" \".join(context_sentences) for context_sentences in train_case.get(\"context\")[\"sentences\"])\n",
"\n",
" texts_to_add.append(train_case_text)\n",
"\n",
"dataset_name = \"train_dataset\"\n",
"await cognee.add(texts_to_add, dataset_name)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44603a2a",
"metadata": {},
"outputs": [],
"source": [
"from os import path\n",
"import cognee\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"print(cognee.datasets.list_datasets())\n",
"\n",
"train_dataset = cognee.datasets.query_data('train_dataset')\n",
"print(len(train_dataset))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65bfaf09",
"metadata": {},
"outputs": [],
"source": [
"from os import path\n",
"import cognee\n",
"\n",
"await cognee.prune.prune_system()\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"await cognee.cognify('train_dataset')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a514cf38",
"metadata": {},
"outputs": [],
"source": [
"import networkx as nx\n",
"import pandas as pd\n",
"import graphistry\n",
"from cognee.config import Config\n",
"from cognee.utils import render_graph\n",
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
"\n",
"config = Config()\n",
"config.load()\n",
"\n",
"graphistry.register(\n",
" api = 3,\n",
" username = config.graphistry_username,\n",
" password = config.graphistry_password\n",
")\n",
"\n",
"graph_client = await get_graph_client(GraphDBType.NETWORKX, \"cognee_graph.pkl\")\n",
"graph = graph_client.graph\n",
"\n",
"await render_graph(graph)\n",
"\n",
"# edges = nx.to_pandas_edgelist(graph)\n",
"\n",
"# nodes_data = [{\n",
"# \"id\": node_id,\n",
"# \"label\": node[\"name\"] if \"name\" in node else node_id,\n",
"# } for (node_id, node) in graph.nodes(data = True)]\n",
"\n",
"# nodes = pd.DataFrame(nodes_data)\n",
"\n",
"# plotter = graphistry.edges(edges, source = \"source\", destination = \"target\").nodes(nodes, \"id\")\n",
"\n",
"# plotter.bind(edge_title = \"relationship_name\", edge_label = \"relationship_name\", point_title = \"name\", point_label = \"name\")\n",
"# url = plotter.plot(render = False, as_files = True)\n",
"# print(f\"Graph is visualized at: {url}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e916c484",
"metadata": {},
"outputs": [],
"source": [
"from os import path\n",
"import cognee\n",
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
"from cognee.modules.search.vector.search_similarity import search_similarity\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"graph_client = await get_graph_client(GraphDBType.NETWORKX)\n",
"graph = graph_client.graph\n",
"\n",
"results = await search_similarity(\"Who is Ernie Grunwald?\", graph)\n",
"\n",
"for result in results:\n",
" print(\"Ernie Grunwald\" in result)\n",
" print(result)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}