cognee/notebooks/full_run.ipynb
2024-05-17 13:42:14 +02:00

192 lines
5.4 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "38135bf7",
"metadata": {},
"outputs": [],
"source": [
"from os import path\n",
"import cognee\n",
"import dspy\n",
"from cognee.modules.cognify.dataset import HotPotQA\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"await cognee.prune.prune_system()\n",
"\n",
"colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = \"http://20.102.90.50:2017/wiki17_abstracts\")\n",
"dspy.configure(rm = colbertv2_wiki17_abstracts)\n",
"\n",
"# dataset = HotPotQA(\n",
"# train_seed = 1,\n",
"# train_size = 10,\n",
"# eval_seed = 2023,\n",
"# dev_size = 0,\n",
"# test_size = 0,\n",
"# keep_details = True,\n",
"# )\n",
"\n",
"# texts_to_add = []\n",
"\n",
"# for train_case in dataset.train:\n",
"# train_case_text = \"\\r\\n\".join(\" \".join(context_sentences) for context_sentences in train_case.get(\"context\")[\"sentences\"])\n",
"\n",
"# texts_to_add.append(train_case_text)\n",
"\n",
"dataset_name = \"short_stories\"\n",
"await cognee.add(\"data://\" + data_directory_path, dataset_name)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44603a2a",
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-30T16:53:16.917678Z",
"start_time": "2024-04-30T16:53:14.700232Z"
}
},
"outputs": [],
"source": [
"from os import path\n",
"import cognee\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"print(cognee.datasets.list_datasets())\n",
"\n",
"train_dataset = cognee.datasets.list_data(\"short_stories\")\n",
"print(len(train_dataset))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65bfaf09",
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-30T16:55:30.886217Z",
"start_time": "2024-04-30T16:53:19.164943Z"
}
},
"outputs": [],
"source": [
"from os import path\n",
"import logging\n",
"import cognee\n",
"\n",
"logging.basicConfig(level = logging.INFO)\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"await cognee.prune.prune_system()\n",
"\n",
"await cognee.cognify(\"short_stories\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a514cf38",
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-30T16:55:39.819971Z",
"start_time": "2024-04-30T16:55:35.627964Z"
}
},
"outputs": [],
"source": [
"import graphistry\n",
"from cognee.config import Config\n",
"from cognee.utils import render_graph\n",
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
"\n",
"config = Config()\n",
"config.load()\n",
"\n",
"graphistry.register(\n",
" api = 3,\n",
" username = config.graphistry_username,\n",
" password = config.graphistry_password\n",
")\n",
"\n",
"graph_client = await get_graph_client(GraphDBType.NETWORKX, \"cognee_graph.pkl\")\n",
"graph = graph_client.graph\n",
"\n",
"await render_graph(graph)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e916c484",
"metadata": {},
"outputs": [],
"source": [
"from os import path\n",
"import cognee\n",
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
"from cognee.modules.search.vector.search_similarity import search_similarity\n",
"\n",
"data_directory_path = path.abspath(\"../.data\")\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"\n",
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"graph_client = await get_graph_client(GraphDBType.NETWORKX)\n",
"graph = graph_client.graph\n",
"\n",
"results = await search_similarity(\"Who are French girls?\", graph)\n",
"\n",
"for result in results:\n",
" print(\"French girls\" in result)\n",
" print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5e44018878d383f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}