183 lines
5.5 KiB
Text
183 lines
5.5 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "38135bf7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import cognee\n",
|
|
"import dspy\n",
|
|
"from cognee.modules.cognify.dataset import HotPotQA\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"await cognee.prune.prune_system()\n",
|
|
"\n",
|
|
"colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = \"http://20.102.90.50:2017/wiki17_abstracts\")\n",
|
|
"dspy.configure(rm = colbertv2_wiki17_abstracts)\n",
|
|
"\n",
|
|
"dataset = HotPotQA(\n",
|
|
" train_seed = 1,\n",
|
|
" train_size = 10,\n",
|
|
" eval_seed = 2023,\n",
|
|
" dev_size = 0,\n",
|
|
" test_size = 0,\n",
|
|
" keep_details = True,\n",
|
|
")\n",
|
|
"\n",
|
|
"texts_to_add = []\n",
|
|
"\n",
|
|
"for train_case in dataset.train:\n",
|
|
" train_case_text = \"\\r\\n\".join(\" \".join(context_sentences) for context_sentences in train_case.get(\"context\")[\"sentences\"])\n",
|
|
"\n",
|
|
" texts_to_add.append(train_case_text)\n",
|
|
"\n",
|
|
"dataset_name = \"train_dataset\"\n",
|
|
"await cognee.add(texts_to_add, dataset_name)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "44603a2a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import cognee\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"print(cognee.datasets.list_datasets())\n",
|
|
"\n",
|
|
"train_dataset = cognee.datasets.query_data('train_dataset')\n",
|
|
"print(len(train_dataset))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "65bfaf09",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import cognee\n",
|
|
"\n",
|
|
"await cognee.prune.prune_system()\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"await cognee.cognify('train_dataset')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a514cf38",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import networkx as nx\n",
|
|
"import pandas as pd\n",
|
|
"import graphistry\n",
|
|
"from cognee.config import Config\n",
|
|
"from cognee.utils import render_graph\n",
|
|
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
|
"\n",
|
|
"config = Config()\n",
|
|
"config.load()\n",
|
|
"\n",
|
|
"graphistry.register(\n",
|
|
" api = 3,\n",
|
|
" username = config.graphistry_username,\n",
|
|
" password = config.graphistry_password\n",
|
|
")\n",
|
|
"\n",
|
|
"graph_client = await get_graph_client(GraphDBType.NETWORKX, \"cognee_graph.pkl\")\n",
|
|
"graph = graph_client.graph\n",
|
|
"\n",
|
|
"await render_graph(graph)\n",
|
|
"\n",
|
|
"# edges = nx.to_pandas_edgelist(graph)\n",
|
|
"\n",
|
|
"# nodes_data = [{\n",
|
|
"# \"id\": node_id,\n",
|
|
"# \"label\": node[\"name\"] if \"name\" in node else node_id,\n",
|
|
"# } for (node_id, node) in graph.nodes(data = True)]\n",
|
|
"\n",
|
|
"# nodes = pd.DataFrame(nodes_data)\n",
|
|
"\n",
|
|
"# plotter = graphistry.edges(edges, source = \"source\", destination = \"target\").nodes(nodes, \"id\")\n",
|
|
"\n",
|
|
"# plotter.bind(edge_title = \"relationship_name\", edge_label = \"relationship_name\", point_title = \"name\", point_label = \"name\")\n",
|
|
"# url = plotter.plot(render = False, as_files = True)\n",
|
|
"# print(f\"Graph is visualized at: {url}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e916c484",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import cognee\n",
|
|
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
|
"from cognee.modules.search.vector.search_similarity import search_similarity\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"graph_client = await get_graph_client(GraphDBType.NETWORKX)\n",
|
|
"graph = graph_client.graph\n",
|
|
"\n",
|
|
"results = await search_similarity(\"Who is Ernie Grunwald?\", graph)\n",
|
|
"\n",
|
|
"for result in results:\n",
|
|
" print(\"Ernie Grunwald\" in result)\n",
|
|
" print(result)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|