{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "823c799a", "metadata": {}, "outputs": [], "source": [ "from os import listdir, path\n", "from uuid import uuid5, UUID\n", "from cognitive_architecture import add\n", "\n", "data_path = path.abspath(\".data\")\n", "pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n", "\n", "await add(\n", " list(map(\n", " lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n", " pdf_files\n", " ))[:3],\n", " uuid5(UUID(\"00000000-0000-0000-0000-000000000000\"), \"pdf_files_cognee\"),\n", " \"test-dataset\"\n", ")\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "c4d5a399", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pipeline file_load_from_filesystem load step completed in 0.39 seconds\n", "1 load package(s) were loaded to destination duckdb and into dataset pravilnik_energetska_efikasnost\n", "The duckdb destination used duckdb:///:external: location to store data\n", "Load package 1710243999.1369941 is LOADED and contains no failed jobs\n", "Pipeline file_load_from_filesystem load step completed in 0.40 seconds\n", "1 load package(s) were loaded to destination duckdb and into dataset pravilnik_energetska_efikasnost_sertifikati\n", "The duckdb destination used duckdb:///:external: location to store data\n", "Load package 1710246971.055336 is LOADED and contains no failed jobs\n" ] } ], "source": [ "from os import listdir, path\n", "from uuid import uuid5, UUID\n", "from cognitive_architecture import add_dlt\n", "\n", "data_path = path.abspath(\".data\")\n", "# pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n", "\n", "# await add_dlt(\n", "# list(map(\n", "# lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n", "# pdf_files\n", "# ))[:5],\n", "# \"pdf_files\"\n", "# )\n", "\n", "results = await add_dlt(data_path, \"pravilnik.energetska_efikasnost\")\n", "for result in results:\n", " print(result)\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "47edce91", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/borisarzentar/Projects/Topoteretes/cognee/cognitive_architecture/data/cognee/cognee.duckdb\n", "┌──────────────────────┬──────────────────────┬──────────────────────┬───┬───────────────────┬────────────────┐\n", "│ id │ name │ file_path │ … │ _dlt_load_id │ _dlt_id │\n", "│ varchar │ varchar │ varchar │ │ varchar │ varchar │\n", "├──────────────────────┼──────────────────────┼──────────────────────┼───┼───────────────────┼────────────────┤\n", "│ 881ecb36-2819-54c3… │ Izmenjen-clan-17-P… │ /Users/borisarzent… │ … │ 1710242259.670676 │ /LPIFEK4ayoivQ │\n", "├──────────────────────┴──────────────────────┴──────────────────────┴───┴───────────────────┴────────────────┤\n", "│ 1 rows 8 columns (5 shown) │\n", "└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n", "\n", "┌──────────────────────┬──────────────────────┬──────────────────────┬───┬────────────────────┬────────────────┐\n", "│ id │ name │ file_path │ … │ _dlt_load_id │ _dlt_id │\n", "│ varchar │ varchar │ varchar │ │ varchar │ varchar │\n", "├──────────────────────┼──────────────────────┼──────────────────────┼───┼────────────────────┼────────────────┤\n", "│ cd1dc11f-397b-5048… │ Pravilnik-o-sadrzi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ TrxbqUr6PepbTA │\n", "│ 320cee87-d02e-540d… │ Pravilnik-o-nacinu… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ njwnSKr24K1vEQ │\n", "│ bfe85a36-1427-555d… │ Pravilnik-o-izgled… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ hYgSJPKZJoQNHQ │\n", "│ 5767b799-9815-5834… │ Pravilnik-o-postup… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ CAR2xYK53eR9Wg │\n", "│ 9133b38e-d2aa-5916… │ Pravilnik-o-uslovi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ aiDe0Ggk5JMN/w │\n", "│ bff79816-8610-5dfa… │ Pravilnik-o-sadrzi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ Fi2fQVYOI1lZ8w │\n", "│ d3fbcf40-abcc-56d4… │ Pravilnik-o-uslovi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ aw1WBpNI62y+Kg │\n", "│ 826bbd41-e322-5587… │ Pravilnik-o-katast… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ S5QOxjEv51lBBw │\n", "│ f354abe5-bc7e-520f… │ Pravilnik-o-objekt… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ B5CinyB0UGlbng │\n", "│ 1e47801b-2a4f-57cf… │ Pravilnik-o-sadrzi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ w6CcdYAB8ie+xw │\n", "├──────────────────────┴──────────────────────┴──────────────────────┴───┴────────────────────┴────────────────┤\n", "│ 10 rows 8 columns (5 shown) │\n", "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n", "\n" ] } ], "source": [ "import duckdb\n", "from cognitive_architecture.root_dir import get_absolute_path\n", "\n", "dataset_name = \"pdf_files\"\n", "\n", "db_path = get_absolute_path(\"./data/cognee\")\n", "db_location = db_path + \"/cognee.duckdb\"\n", "print(db_location)\n", "\n", "db = duckdb.connect(db_location)\n", "\n", "izmene = db.sql(f\"SELECT * FROM izmene.file_metadata;\")\n", "\n", "print(izmene)\n", "\n", "pravilnik = db.sql(f\"SELECT * FROM pravilnik.file_metadata;\")\n", "\n", "print(pravilnik)\n", "\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }