From 84cab02c3400ff524e4808562cacf49e859713ba Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 13 Mar 2024 12:42:25 +0100 Subject: [PATCH] Fix poetry deps --- cognee - Get Started.ipynb | 105 +++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 38 deletions(-) diff --git a/cognee - Get Started.ipynb b/cognee - Get Started.ipynb index 3510555fd..feb772b67 100644 --- a/cognee - Get Started.ipynb +++ b/cognee - Get Started.ipynb @@ -192,63 +192,92 @@ "We load the data from a local folder" ] }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3a64a838-d02a-4f72-ad30-8732f4445930", + "metadata": {}, + "outputs": [], + "source": [ + "import iso639" + ] + }, { "cell_type": "code", "execution_count": null, + "id": "5dffa4ac-9c10-4b88-a324-eac390294224", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "5b3954c1-f537-4be7-a578-1d5037c21374", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /Users/vasa/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n", + "[nltk_data] Downloading package averaged_perceptron_tagger to\n", + "[nltk_data] /Users/vasa/nltk_data...\n", + "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline file_load_from_filesystem load step completed in 0.38 seconds\n", + "1 load package(s) were loaded to destination duckdb and into dataset izmene\n", + "The duckdb destination used duckdb:///:external: location to store data\n", + "Load package 1710323687.737153 is LOADED and contains no failed jobs\n" + ] + } + ], "source": [ "from os import listdir, path\n", "from uuid import uuid5, UUID\n", "from cognitive_architecture import add\n", "\n", + "# we define the folder where our data is\n", "data_path = path.abspath(\".data\")\n", - "pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n", "\n", - "await add(\n", - " list(map(\n", - " lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n", - " pdf_files\n", - " ))[:3],\n", - " uuid5(UUID(\"00000000-0000-0000-0000-000000000000\"), \"pdf_files_cognee\"),\n", - " \"test-dataset\"\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0732b21-5c41-4427-ad1c-7df86245217d", - "metadata": {}, - "outputs": [], - "source": [ - "from os import listdir, path\n", - "from uuid import uuid5, UUID\n", - "from cognitive_architecture import add\n", - "\n", - "data_path = path.abspath(\".data\")\n", - "# pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n", - "\n", - "# await add_dlt(\n", - "# list(map(\n", - "# lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n", - "# pdf_files\n", - "# ))[:5],\n", - "# \"pdf_files\"\n", - "# )\n", - "\n", - "results = await add(data_path, \"pravilnik.energetska_efikasnost\")\n", + "# and then we simply add the data to cognee\n", + "results = await add(data_path, \"izmene\")\n", "for result in results:\n", - " print(result)\n" + " print(result)\n", + "\n", + "# data gets processed with unstructured and dlt" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "39df49ca-06f0-4b86-ae27-93c68ddceac3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/vasa/Projects/cognee/cognitive_architecture/data/cognee/cognee.duckdb\n", + "['izmene']\n", + "┌──────────────────────┬──────────────────────┬──────────────────────┬───┬───────────────────┬────────────────┐\n", + "│ id │ name │ file_path │ … │ _dlt_load_id │ _dlt_id │\n", + "│ varchar │ varchar │ varchar │ │ varchar │ varchar │\n", + "├──────────────────────┼──────────────────────┼──────────────────────┼───┼───────────────────┼────────────────┤\n", + "│ 881ecb36-2819-54c3… │ Izmenjen-clan-17-P… │ /Users/vasa/Projec… │ … │ 1710323687.737153 │ hvdIIsqChgxUUg │\n", + "├──────────────────────┴──────────────────────┴──────────────────────┴───┴───────────────────┴────────────────┤\n", + "│ 1 rows 8 columns (5 shown) │\n", + "└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n", + "\n" + ] + } + ], "source": [ "import duckdb\n", "from cognitive_architecture.root_dir import get_absolute_path\n",