{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "38135bf7", "metadata": {}, "outputs": [], "source": [ "from os import path\n", "import cognee\n", "import dspy\n", "from cognee.modules.cognify.dataset import HotPotQA\n", "\n", "data_directory_path = path.abspath(\"../.data\")\n", "cognee.config.data_root_directory(data_directory_path)\n", "\n", "cognee_directory_path = path.abspath(\"../.cognee_system\")\n", "cognee.config.system_root_directory(cognee_directory_path)\n", "\n", "await cognee.prune.prune_system()\n", "\n", "colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = \"http://20.102.90.50:2017/wiki17_abstracts\")\n", "dspy.configure(rm = colbertv2_wiki17_abstracts)\n", "\n", "dataset = HotPotQA(\n", " train_seed = 1,\n", " train_size = 10,\n", " eval_seed = 2023,\n", " dev_size = 0,\n", " test_size = 0,\n", " keep_details = True,\n", ")\n", "\n", "texts_to_add = []\n", "\n", "for train_case in dataset.train:\n", " train_case_text = \"\\r\\n\".join(\" \".join(context_sentences) for context_sentences in train_case.get(\"context\")[\"sentences\"])\n", "\n", " texts_to_add.append(train_case_text)\n", "\n", "dataset_name = \"train_dataset\"\n", "await cognee.add(texts_to_add, dataset_name)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "44603a2a", "metadata": {}, "outputs": [], "source": [ "from os import path\n", "import cognee\n", "\n", "data_directory_path = path.abspath(\"../.data\")\n", "cognee.config.data_root_directory(data_directory_path)\n", "\n", "cognee_directory_path = path.abspath(\"../.cognee_system\")\n", "cognee.config.system_root_directory(cognee_directory_path)\n", "\n", "print(cognee.datasets.list_datasets())\n", "\n", "train_dataset = cognee.datasets.query_data('train_dataset')\n", "print(len(train_dataset))" ] }, { "cell_type": "code", "execution_count": null, "id": "65bfaf09", "metadata": {}, "outputs": [], "source": [ "from os import path\n", "import cognee\n", "\n", "await cognee.prune.prune_system()\n", "\n", "data_directory_path = path.abspath(\"../.data\")\n", "cognee.config.data_root_directory(data_directory_path)\n", "\n", "cognee_directory_path = path.abspath(\"../.cognee_system\")\n", "cognee.config.system_root_directory(cognee_directory_path)\n", "\n", "await cognee.cognify('train_dataset')" ] }, { "cell_type": "code", "execution_count": null, "id": "a514cf38", "metadata": {}, "outputs": [], "source": [ "import networkx as nx\n", "import pandas as pd\n", "import graphistry\n", "from cognee.config import Config\n", "from cognee.utils import render_graph\n", "from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n", "\n", "config = Config()\n", "config.load()\n", "\n", "graphistry.register(\n", " api = 3,\n", " username = config.graphistry_username,\n", " password = config.graphistry_password\n", ")\n", "\n", "graph_client = await get_graph_client(GraphDBType.NETWORKX, \"cognee_graph.pkl\")\n", "graph = graph_client.graph\n", "\n", "await render_graph(graph)\n", "\n", "# edges = nx.to_pandas_edgelist(graph)\n", "\n", "# nodes_data = [{\n", "# \"id\": node_id,\n", "# \"label\": node[\"name\"] if \"name\" in node else node_id,\n", "# } for (node_id, node) in graph.nodes(data = True)]\n", "\n", "# nodes = pd.DataFrame(nodes_data)\n", "\n", "# plotter = graphistry.edges(edges, source = \"source\", destination = \"target\").nodes(nodes, \"id\")\n", "\n", "# plotter.bind(edge_title = \"relationship_name\", edge_label = \"relationship_name\", point_title = \"name\", point_label = \"name\")\n", "# url = plotter.plot(render = False, as_files = True)\n", "# print(f\"Graph is visualized at: {url}\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "e916c484", "metadata": {}, "outputs": [], "source": [ "from os import path\n", "import cognee\n", "from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n", "from cognee.modules.search.vector.search_similarity import search_similarity\n", "\n", "data_directory_path = path.abspath(\"../.data\")\n", "cognee.config.data_root_directory(data_directory_path)\n", "\n", "cognee_directory_path = path.abspath(\"../.cognee_system\")\n", "cognee.config.system_root_directory(cognee_directory_path)\n", "\n", "graph_client = await get_graph_client(GraphDBType.NETWORKX)\n", "graph = graph_client.graph\n", "\n", "results = await search_similarity(\"Who is Ernie Grunwald?\", graph)\n", "\n", "for result in results:\n", " print(\"Ernie Grunwald\" in result)\n", " print(result)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }