* feat: integrate lancedb * fix: use futures in weaviate adapter to enable async behaviour
192 lines
5.4 KiB
Text
192 lines
5.4 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "38135bf7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import cognee\n",
|
|
"import dspy\n",
|
|
"from cognee.modules.cognify.dataset import HotPotQA\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"await cognee.prune.prune_system()\n",
|
|
"\n",
|
|
"colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = \"http://20.102.90.50:2017/wiki17_abstracts\")\n",
|
|
"dspy.configure(rm = colbertv2_wiki17_abstracts)\n",
|
|
"\n",
|
|
"# dataset = HotPotQA(\n",
|
|
"# train_seed = 1,\n",
|
|
"# train_size = 10,\n",
|
|
"# eval_seed = 2023,\n",
|
|
"# dev_size = 0,\n",
|
|
"# test_size = 0,\n",
|
|
"# keep_details = True,\n",
|
|
"# )\n",
|
|
"\n",
|
|
"# texts_to_add = []\n",
|
|
"\n",
|
|
"# for train_case in dataset.train:\n",
|
|
"# train_case_text = \"\\r\\n\".join(\" \".join(context_sentences) for context_sentences in train_case.get(\"context\")[\"sentences\"])\n",
|
|
"\n",
|
|
"# texts_to_add.append(train_case_text)\n",
|
|
"\n",
|
|
"dataset_name = \"short_stories\"\n",
|
|
"await cognee.add(\"data://\" + data_directory_path, dataset_name)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "44603a2a",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-30T16:53:16.917678Z",
|
|
"start_time": "2024-04-30T16:53:14.700232Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import cognee\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"print(cognee.datasets.list_datasets())\n",
|
|
"\n",
|
|
"train_dataset = cognee.datasets.query_data(\"short_stories\")\n",
|
|
"print(len(train_dataset))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "65bfaf09",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-30T16:55:30.886217Z",
|
|
"start_time": "2024-04-30T16:53:19.164943Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import logging\n",
|
|
"import cognee\n",
|
|
"\n",
|
|
"logging.basicConfig(level = logging.INFO)\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"await cognee.prune.prune_system()\n",
|
|
"\n",
|
|
"await cognee.cognify(\"short_stories\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a514cf38",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-30T16:55:39.819971Z",
|
|
"start_time": "2024-04-30T16:55:35.627964Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import graphistry\n",
|
|
"from cognee.config import Config\n",
|
|
"from cognee.utils import render_graph\n",
|
|
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
|
"\n",
|
|
"config = Config()\n",
|
|
"config.load()\n",
|
|
"\n",
|
|
"graphistry.register(\n",
|
|
" api = 3,\n",
|
|
" username = config.graphistry_username,\n",
|
|
" password = config.graphistry_password\n",
|
|
")\n",
|
|
"\n",
|
|
"graph_client = await get_graph_client(GraphDBType.NETWORKX, \"cognee_graph.pkl\")\n",
|
|
"graph = graph_client.graph\n",
|
|
"\n",
|
|
"await render_graph(graph)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e916c484",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import path\n",
|
|
"import cognee\n",
|
|
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
|
"from cognee.modules.search.vector.search_similarity import search_similarity\n",
|
|
"\n",
|
|
"data_directory_path = path.abspath(\"../.data\")\n",
|
|
"cognee.config.data_root_directory(data_directory_path)\n",
|
|
"\n",
|
|
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
|
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
|
"\n",
|
|
"graph_client = await get_graph_client(GraphDBType.NETWORKX)\n",
|
|
"graph = graph_client.graph\n",
|
|
"\n",
|
|
"results = await search_similarity(\"Who are French girls?\", graph)\n",
|
|
"\n",
|
|
"for result in results:\n",
|
|
" print(\"French girls\" in result)\n",
|
|
" print(result)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e5e44018878d383f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|