Fix poetry deps

2024-03-13 12:42:25 +01:00 · 2024-03-13 12:42:25 +01:00 · 84cab02c34
commit 84cab02c34
parent ee108f039d
1 changed files with 67 additions and 38 deletions
--- a/Started.ipynb
+++ b/Started.ipynb
@ -192,63 +192,92 @@
    "We load the data from a local folder"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3a64a838-d02a-4f72-ad30-8732f4445930",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import iso639"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "id": "5dffa4ac-9c10-4b88-a324-eac390294224",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
   "id": "5b3954c1-f537-4be7-a578-1d5037c21374",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to /Users/vasa/nltk_data...\n",
+      "[nltk_data]   Unzipping tokenizers/punkt.zip.\n",
+      "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
+      "[nltk_data]     /Users/vasa/nltk_data...\n",
+      "[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pipeline file_load_from_filesystem load step completed in 0.38 seconds\n",
+      "1 load package(s) were loaded to destination duckdb and into dataset izmene\n",
+      "The duckdb destination used duckdb:///:external: location to store data\n",
+      "Load package 1710323687.737153 is LOADED and contains no failed jobs\n"
+     ]
+    }
+   ],
   "source": [
    "from os import listdir, path\n",
    "from uuid import uuid5, UUID\n",
    "from cognitive_architecture import add\n",
    "\n",
+    "# we define the folder where our data is\n",
    "data_path = path.abspath(\".data\")\n",
-    "pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
    "\n",
-    "await add(\n",
-    "    list(map(\n",
-    "        lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
-    "        pdf_files\n",
-    "    ))[:3],\n",
-    "    uuid5(UUID(\"00000000-0000-0000-0000-000000000000\"), \"pdf_files_cognee\"),\n",
-    "    \"test-dataset\"\n",
-    ")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a0732b21-5c41-4427-ad1c-7df86245217d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from os import listdir, path\n",
-    "from uuid import uuid5, UUID\n",
-    "from cognitive_architecture import add\n",
-    "\n",
-    "data_path = path.abspath(\".data\")\n",
-    "# pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
-    "\n",
-    "# await add_dlt(\n",
-    "#     list(map(\n",
-    "#         lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
-    "#         pdf_files\n",
-    "#     ))[:5],\n",
-    "#     \"pdf_files\"\n",
-    "# )\n",
-    "\n",
-    "results = await add(data_path, \"pravilnik.energetska_efikasnost\")\n",
+    "# and then we simply add the data to cognee\n",
+    "results = await add(data_path, \"izmene\")\n",
    "for result in results:\n",
-    "    print(result)\n"
+    "    print(result)\n",
+    "\n",
+    "# data gets processed with unstructured and dlt"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "id": "39df49ca-06f0-4b86-ae27-93c68ddceac3",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/Users/vasa/Projects/cognee/cognitive_architecture/data/cognee/cognee.duckdb\n",
+      "['izmene']\n",
+      "┌──────────────────────┬──────────────────────┬──────────────────────┬───┬───────────────────┬────────────────┐\n",
+      "│          id          │         name         │      file_path       │ … │   _dlt_load_id    │    _dlt_id     │\n",
+      "│       varchar        │       varchar        │       varchar        │   │      varchar      │    varchar     │\n",
+      "├──────────────────────┼──────────────────────┼──────────────────────┼───┼───────────────────┼────────────────┤\n",
+      "│ 881ecb36-2819-54c3…  │ Izmenjen-clan-17-P…  │ /Users/vasa/Projec…  │ … │ 1710323687.737153 │ hvdIIsqChgxUUg │\n",
+      "├──────────────────────┴──────────────────────┴──────────────────────┴───┴───────────────────┴────────────────┤\n",
+      "│ 1 rows                                                                                  8 columns (5 shown) │\n",
+      "└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n",
+      "\n"
+     ]
+    }
+   ],
   "source": [
    "import duckdb\n",
    "from cognitive_architecture.root_dir import get_absolute_path\n",