137 lines
5.6 KiB
Text
137 lines
5.6 KiB
Text
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "823c799a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from os import listdir, path\n",
|
||
"from uuid import uuid5, UUID\n",
|
||
"from cognitive_architecture import add\n",
|
||
"\n",
|
||
"data_path = path.abspath(\".data\")\n",
|
||
"pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
|
||
"\n",
|
||
"await add(\n",
|
||
" list(map(\n",
|
||
" lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
|
||
" pdf_files\n",
|
||
" ))[:3],\n",
|
||
" uuid5(UUID(\"00000000-0000-0000-0000-000000000000\"), \"pdf_files_cognee\"),\n",
|
||
" \"test-dataset\"\n",
|
||
")\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "c4d5a399",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from os import listdir, path\n",
|
||
"from uuid import uuid5, UUID\n",
|
||
"from cognitive_architecture import add_dlt\n",
|
||
"\n",
|
||
"data_path = path.abspath(\".data\")\n",
|
||
"# pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
|
||
"\n",
|
||
"# await add_dlt(\n",
|
||
"# list(map(\n",
|
||
"# lambda file_path: f\"file://{path.join(data_path, file_path)}\",\n",
|
||
"# pdf_files\n",
|
||
"# ))[:5],\n",
|
||
"# \"pdf_files\"\n",
|
||
"# )\n",
|
||
"\n",
|
||
"results = await add_dlt(data_path, \"pravilnik.energetska_efikasnost\")\n",
|
||
"for result in results:\n",
|
||
" print(result)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "47edce91",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import duckdb\n",
|
||
"from cognitive_architecture.root_dir import get_absolute_path\n",
|
||
"\n",
|
||
"dataset_name = \"pdf_files\"\n",
|
||
"\n",
|
||
"db_path = get_absolute_path(\"./data/cognee\")\n",
|
||
"db_location = db_path + \"/cognee.duckdb\"\n",
|
||
"print(db_location)\n",
|
||
"\n",
|
||
"db = duckdb.connect(db_location)\n",
|
||
"\n",
|
||
"izmene = db.sql(f\"SELECT * FROM izmene.file_metadata;\")\n",
|
||
"\n",
|
||
"print(izmene)\n",
|
||
"\n",
|
||
"pravilnik = db.sql(f\"SELECT * FROM pravilnik.file_metadata;\")\n",
|
||
"\n",
|
||
"print(pravilnik)\n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "607bf624",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from os import path, listdir\n",
|
||
"from cognitive_architecture import cognify\n",
|
||
"from unstructured.cleaners.core import clean\n",
|
||
"from unstructured.partition.pdf import partition_pdf\n",
|
||
"from cognitive_architecture.utils import render_graph\n",
|
||
"\n",
|
||
"data_path = path.abspath(\".data/izmene\")\n",
|
||
"pdf_files = [file for file in listdir(data_path) if path.isfile(path.join(data_path, file))]\n",
|
||
"\n",
|
||
"with open(path.join(data_path, pdf_files[0]), mode = \"rb\") as file:\n",
|
||
" # elements = partition_pdf(file = file, strategy = \"fast\")\n",
|
||
" # text = \"\\n\".join(map(lambda element: clean(element.text), elements))\n",
|
||
"\n",
|
||
" text = \"\"\"In the nicest possible way, Britons have always been a bit silly about animals. “Keeping pets, for the English, is not so much a leisure activity as it is an entire way of life,” wrote the anthropologist Kate Fox in Watching the English, nearly 20 years ago. Our dogs, in particular, have been an acceptable outlet for emotions and impulses we otherwise keep strictly controlled – our latent desire to be demonstratively affectionate, to be silly and chat to strangers. If this seems like an exaggeration, consider the different reactions you’d get if you struck up a conversation with someone in a park with a dog, versus someone on the train.\n",
|
||
"Indeed, British society has been set up to accommodate these four-legged ambassadors. In the UK – unlike Australia, say, or New Zealand – dogs are not just permitted on public transport but often openly encouraged. Many pubs and shops display waggish signs, reading, “Dogs welcome, people tolerated”, and have treat jars on their counters. The other day, as I was waiting outside a cafe with a friend’s dog, the barista urged me to bring her inside.\n",
|
||
"For years, Britons’ non-partisan passion for animals has been consistent amid dwindling common ground. But lately, rather than bringing out the best in us, our relationship with dogs is increasingly revealing us at our worst – and our supposed “best friends” are paying the price.\n",
|
||
"As with so many latent traits in the national psyche, it all came unleashed with the pandemic, when many people thought they might as well make the most of all that time at home and in local parks with a dog. Between 2019 and 2022, the number of pet dogs in the UK rose from about nine million to 13 million. But there’s long been a seasonal surge around this time of year, substantial enough for the Dogs Trust charity to coin its famous slogan back in 1978: “A dog is for life, not just for Christmas.”\"\"\"\n",
|
||
"\n",
|
||
" graph = await cognify(text)\n",
|
||
"\n",
|
||
" graph_url = await render_graph(graph, graph_type = \"networkx\")\n",
|
||
" print(graph_url)\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.13"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|