156 lines
8.7 KiB
Text
156 lines
8.7 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "823c799a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from os import listdir, path\n",
|
|
"from uuid import uuid5, UUID\n",
|
|
"from cognitive_architecture import add\n",
|
|
"\n",
|
|
"data_path = path.abspath(\".data\")\n",
|
|
"pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
|
|
"\n",
|
|
"await add(\n",
|
|
" list(map(\n",
|
|
" lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
|
|
" pdf_files\n",
|
|
" ))[:3],\n",
|
|
" uuid5(UUID(\"00000000-0000-0000-0000-000000000000\"), \"pdf_files_cognee\"),\n",
|
|
" \"test-dataset\"\n",
|
|
")\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "c4d5a399",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Pipeline file_load_from_filesystem load step completed in 0.39 seconds\n",
|
|
"1 load package(s) were loaded to destination duckdb and into dataset pravilnik_energetska_efikasnost\n",
|
|
"The duckdb destination used duckdb:///:external: location to store data\n",
|
|
"Load package 1710243999.1369941 is LOADED and contains no failed jobs\n",
|
|
"Pipeline file_load_from_filesystem load step completed in 0.40 seconds\n",
|
|
"1 load package(s) were loaded to destination duckdb and into dataset pravilnik_energetska_efikasnost_sertifikati\n",
|
|
"The duckdb destination used duckdb:///:external: location to store data\n",
|
|
"Load package 1710246971.055336 is LOADED and contains no failed jobs\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from os import listdir, path\n",
|
|
"from uuid import uuid5, UUID\n",
|
|
"from cognitive_architecture import add_dlt\n",
|
|
"\n",
|
|
"data_path = path.abspath(\".data\")\n",
|
|
"# pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
|
|
"\n",
|
|
"# await add_dlt(\n",
|
|
"# list(map(\n",
|
|
"# lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
|
|
"# pdf_files\n",
|
|
"# ))[:5],\n",
|
|
"# \"pdf_files\"\n",
|
|
"# )\n",
|
|
"\n",
|
|
"results = await add_dlt(data_path, \"pravilnik.energetska_efikasnost\")\n",
|
|
"for result in results:\n",
|
|
" print(result)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "47edce91",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/Users/borisarzentar/Projects/Topoteretes/cognee/cognitive_architecture/data/cognee/cognee.duckdb\n",
|
|
"┌──────────────────────┬──────────────────────┬──────────────────────┬───┬───────────────────┬────────────────┐\n",
|
|
"│ id │ name │ file_path │ … │ _dlt_load_id │ _dlt_id │\n",
|
|
"│ varchar │ varchar │ varchar │ │ varchar │ varchar │\n",
|
|
"├──────────────────────┼──────────────────────┼──────────────────────┼───┼───────────────────┼────────────────┤\n",
|
|
"│ 881ecb36-2819-54c3… │ Izmenjen-clan-17-P… │ /Users/borisarzent… │ … │ 1710242259.670676 │ /LPIFEK4ayoivQ │\n",
|
|
"├──────────────────────┴──────────────────────┴──────────────────────┴───┴───────────────────┴────────────────┤\n",
|
|
"│ 1 rows 8 columns (5 shown) │\n",
|
|
"└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n",
|
|
"\n",
|
|
"┌──────────────────────┬──────────────────────┬──────────────────────┬───┬────────────────────┬────────────────┐\n",
|
|
"│ id │ name │ file_path │ … │ _dlt_load_id │ _dlt_id │\n",
|
|
"│ varchar │ varchar │ varchar │ │ varchar │ varchar │\n",
|
|
"├──────────────────────┼──────────────────────┼──────────────────────┼───┼────────────────────┼────────────────┤\n",
|
|
"│ cd1dc11f-397b-5048… │ Pravilnik-o-sadrzi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ TrxbqUr6PepbTA │\n",
|
|
"│ 320cee87-d02e-540d… │ Pravilnik-o-nacinu… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ njwnSKr24K1vEQ │\n",
|
|
"│ bfe85a36-1427-555d… │ Pravilnik-o-izgled… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ hYgSJPKZJoQNHQ │\n",
|
|
"│ 5767b799-9815-5834… │ Pravilnik-o-postup… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ CAR2xYK53eR9Wg │\n",
|
|
"│ 9133b38e-d2aa-5916… │ Pravilnik-o-uslovi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ aiDe0Ggk5JMN/w │\n",
|
|
"│ bff79816-8610-5dfa… │ Pravilnik-o-sadrzi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ Fi2fQVYOI1lZ8w │\n",
|
|
"│ d3fbcf40-abcc-56d4… │ Pravilnik-o-uslovi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ aw1WBpNI62y+Kg │\n",
|
|
"│ 826bbd41-e322-5587… │ Pravilnik-o-katast… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ S5QOxjEv51lBBw │\n",
|
|
"│ f354abe5-bc7e-520f… │ Pravilnik-o-objekt… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ B5CinyB0UGlbng │\n",
|
|
"│ 1e47801b-2a4f-57cf… │ Pravilnik-o-sadrzi… │ /Users/borisarzent… │ … │ 1710243553.0357041 │ w6CcdYAB8ie+xw │\n",
|
|
"├──────────────────────┴──────────────────────┴──────────────────────┴───┴────────────────────┴────────────────┤\n",
|
|
"│ 10 rows 8 columns (5 shown) │\n",
|
|
"└──────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import duckdb\n",
|
|
"from cognitive_architecture.root_dir import get_absolute_path\n",
|
|
"\n",
|
|
"dataset_name = \"pdf_files\"\n",
|
|
"\n",
|
|
"db_path = get_absolute_path(\"./data/cognee\")\n",
|
|
"db_location = db_path + \"/cognee.duckdb\"\n",
|
|
"print(db_location)\n",
|
|
"\n",
|
|
"db = duckdb.connect(db_location)\n",
|
|
"\n",
|
|
"izmene = db.sql(f\"SELECT * FROM izmene.file_metadata;\")\n",
|
|
"\n",
|
|
"print(izmene)\n",
|
|
"\n",
|
|
"pravilnik = db.sql(f\"SELECT * FROM pravilnik.file_metadata;\")\n",
|
|
"\n",
|
|
"print(pravilnik)\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.13"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|