Fix poetry deps
This commit is contained in:
parent
ee108f039d
commit
84cab02c34
1 changed files with 67 additions and 38 deletions
|
|
@ -192,63 +192,92 @@
|
||||||
"We load the data from a local folder"
|
"We load the data from a local folder"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "3a64a838-d02a-4f72-ad30-8732f4445930",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import iso639"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
"id": "5dffa4ac-9c10-4b88-a324-eac390294224",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
"id": "5b3954c1-f537-4be7-a578-1d5037c21374",
|
"id": "5b3954c1-f537-4be7-a578-1d5037c21374",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[nltk_data] Downloading package punkt to /Users/vasa/nltk_data...\n",
|
||||||
|
"[nltk_data] Unzipping tokenizers/punkt.zip.\n",
|
||||||
|
"[nltk_data] Downloading package averaged_perceptron_tagger to\n",
|
||||||
|
"[nltk_data] /Users/vasa/nltk_data...\n",
|
||||||
|
"[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Pipeline file_load_from_filesystem load step completed in 0.38 seconds\n",
|
||||||
|
"1 load package(s) were loaded to destination duckdb and into dataset izmene\n",
|
||||||
|
"The duckdb destination used duckdb:///:external: location to store data\n",
|
||||||
|
"Load package 1710323687.737153 is LOADED and contains no failed jobs\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from os import listdir, path\n",
|
"from os import listdir, path\n",
|
||||||
"from uuid import uuid5, UUID\n",
|
"from uuid import uuid5, UUID\n",
|
||||||
"from cognitive_architecture import add\n",
|
"from cognitive_architecture import add\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# we define the folder where our data is\n",
|
||||||
"data_path = path.abspath(\".data\")\n",
|
"data_path = path.abspath(\".data\")\n",
|
||||||
"pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"await add(\n",
|
"# and then we simply add the data to cognee\n",
|
||||||
" list(map(\n",
|
"results = await add(data_path, \"izmene\")\n",
|
||||||
" lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
|
|
||||||
" pdf_files\n",
|
|
||||||
" ))[:3],\n",
|
|
||||||
" uuid5(UUID(\"00000000-0000-0000-0000-000000000000\"), \"pdf_files_cognee\"),\n",
|
|
||||||
" \"test-dataset\"\n",
|
|
||||||
")\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "a0732b21-5c41-4427-ad1c-7df86245217d",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from os import listdir, path\n",
|
|
||||||
"from uuid import uuid5, UUID\n",
|
|
||||||
"from cognitive_architecture import add\n",
|
|
||||||
"\n",
|
|
||||||
"data_path = path.abspath(\".data\")\n",
|
|
||||||
"# pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
|
|
||||||
"\n",
|
|
||||||
"# await add_dlt(\n",
|
|
||||||
"# list(map(\n",
|
|
||||||
"# lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
|
|
||||||
"# pdf_files\n",
|
|
||||||
"# ))[:5],\n",
|
|
||||||
"# \"pdf_files\"\n",
|
|
||||||
"# )\n",
|
|
||||||
"\n",
|
|
||||||
"results = await add(data_path, \"pravilnik.energetska_efikasnost\")\n",
|
|
||||||
"for result in results:\n",
|
"for result in results:\n",
|
||||||
" print(result)\n"
|
" print(result)\n",
|
||||||
|
"\n",
|
||||||
|
"# data gets processed with unstructured and dlt"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 6,
|
||||||
"id": "39df49ca-06f0-4b86-ae27-93c68ddceac3",
|
"id": "39df49ca-06f0-4b86-ae27-93c68ddceac3",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/Users/vasa/Projects/cognee/cognitive_architecture/data/cognee/cognee.duckdb\n",
|
||||||
|
"['izmene']\n",
|
||||||
|
"┌──────────────────────┬──────────────────────┬──────────────────────┬───┬───────────────────┬────────────────┐\n",
|
||||||
|
"│ id │ name │ file_path │ … │ _dlt_load_id │ _dlt_id │\n",
|
||||||
|
"│ varchar │ varchar │ varchar │ │ varchar │ varchar │\n",
|
||||||
|
"├──────────────────────┼──────────────────────┼──────────────────────┼───┼───────────────────┼────────────────┤\n",
|
||||||
|
"│ 881ecb36-2819-54c3… │ Izmenjen-clan-17-P… │ /Users/vasa/Projec… │ … │ 1710323687.737153 │ hvdIIsqChgxUUg │\n",
|
||||||
|
"├──────────────────────┴──────────────────────┴──────────────────────┴───┴───────────────────┴────────────────┤\n",
|
||||||
|
"│ 1 rows 8 columns (5 shown) │\n",
|
||||||
|
"└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import duckdb\n",
|
"import duckdb\n",
|
||||||
"from cognitive_architecture.root_dir import get_absolute_path\n",
|
"from cognitive_architecture.root_dir import get_absolute_path\n",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue