diff --git a/.env.template b/.env.template index 085dd54cf..43cdedee2 100644 --- a/.env.template +++ b/.env.template @@ -18,8 +18,14 @@ VECTOR_ENGINE_PROVIDER="qdrant" # or "weaviate" or "lancedb" VECTOR_DB_URL= VECTOR_DB_KEY= +# Database provider +DB_PROVIDER="sqlite" # or "postgres" + +# Database name +DB_NAME=cognee_db + +# Postgres specific parameters (Only if Postgres is run) DB_HOST=127.0.0.1 DB_PORT=5432 DB_USERNAME=cognee DB_PASSWORD=cognee -DB_DATABASE=cognee_db diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 1f2046baa..85f0688ac 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -8,8 +8,9 @@ from cognee.infrastructure.files.storage import LocalStorage from cognee.modules.ingestion import get_matched_datasets, save_data_to_file from cognee.shared.utils import send_telemetry from cognee.base_config import get_base_config -from cognee.infrastructure.databases.relational import get_relational_config, get_relational_engine, create_db_and_tables +from cognee.infrastructure.databases.relational import get_relational_engine, create_db_and_tables from cognee.modules.users.methods import get_default_user +from cognee.tasks.ingestion import get_dlt_destination from cognee.modules.users.permissions.methods import give_permission_on_document from cognee.modules.users.models import User from cognee.modules.data.methods import create_dataset @@ -78,18 +79,7 @@ async def add_files(file_paths: List[str], dataset_name: str, user: User = None) else: processed_file_paths.append(file_path) - relational_config = get_relational_config() - - destination = dlt.destinations.sqlalchemy( - credentials = { - "host": relational_config.db_host, - "port": relational_config.db_port, - "username": relational_config.db_username, - "password": relational_config.db_password, - "database": relational_config.db_name, - "drivername": relational_config.db_provider, - }, - ) + destination = get_dlt_destination() pipeline = dlt.pipeline( pipeline_name = "file_load_from_filesystem", diff --git a/cognee/infrastructure/databases/relational/data_types/UUID.py b/cognee/infrastructure/databases/relational/data_types/UUID.py index 5cd24c3d3..722204b37 100644 --- a/cognee/infrastructure/databases/relational/data_types/UUID.py +++ b/cognee/infrastructure/databases/relational/data_types/UUID.py @@ -38,6 +38,8 @@ class UUID(TypeDecorator): if value is None: return value if dialect.name == 'postgresql': + if isinstance(value, uuid.UUID): + return value return uuid.UUID(value) else: return uuid.UUID(bytes = value) diff --git a/cognee/tasks/ingestion/get_dlt_destination.py b/cognee/tasks/ingestion/get_dlt_destination.py new file mode 100644 index 000000000..97e3d3220 --- /dev/null +++ b/cognee/tasks/ingestion/get_dlt_destination.py @@ -0,0 +1,41 @@ +import os +import dlt +from typing import Union + +from cognee.infrastructure.databases.relational import get_relational_config + +def get_dlt_destination() -> Union[type[dlt.destinations.sqlalchemy], None]: + """ + Handles propagation of the cognee database configuration to the dlt library + + Returns: + sqlachemy: sqlachemy destination used by the dlt library + + """ + relational_config = get_relational_config() + + if relational_config.db_provider == "sqlite": + # When sqlite is the database provider hostname, port, username and password should not be forwarded. + # The database is found by combining the path location and the database name + destination = dlt.destinations.sqlalchemy( + credentials = { + "database": os.path.join(relational_config.db_path, relational_config.db_name), + "drivername": relational_config.db_provider, + }, + ) + elif relational_config.db_provider == "postgres": + # The dlt library doesn't accept postgres as the drivername, it only accepts postgresql + destination = dlt.destinations.sqlalchemy( + credentials = { + "host": relational_config.db_host, + "port": relational_config.db_port, + "username": relational_config.db_username, + "password": relational_config.db_password, + "database": relational_config.db_name, + "drivername": "postgresql", + }, + ) + else: + destination = None + + return destination diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index a7ccdd8fa..2c0eba4e8 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -6,20 +6,10 @@ from cognee.modules.users.models import User from cognee.infrastructure.databases.relational import get_relational_config, get_relational_engine from cognee.modules.data.methods import create_dataset from cognee.modules.users.permissions.methods import give_permission_on_document +from .get_dlt_destination import get_dlt_destination async def ingest_data(file_paths: list[str], dataset_name: str, user: User): - relational_config = get_relational_config() - - destination = dlt.destinations.sqlalchemy( - credentials = { - "host": relational_config.db_host, - "port": relational_config.db_port, - "username": relational_config.db_username, - "password": relational_config.db_password, - "database": relational_config.db_name, - "drivername": relational_config.db_provider, - }, - ) + destination = get_dlt_destination() pipeline = dlt.pipeline( pipeline_name = "file_load_from_filesystem", diff --git a/notebooks/cognee_demo_1.5.ipynb b/notebooks/cognee_demo_1.5.ipynb index e7850edc8..8d4cbdc12 100644 --- a/notebooks/cognee_demo_1.5.ipynb +++ b/notebooks/cognee_demo_1.5.ipynb @@ -1,44 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "958375a6ffc0c2e4", - "metadata": { - "ExecuteTime": { - "end_time": "2024-09-20T14:02:47.336283Z", - "start_time": "2024-09-20T14:02:43.652444Z" - } - }, - "outputs": [], - "source": [ - "import asyncio\n", - "import logging\n", - "from typing import Union\n", - "\n", - "from cognee.modules.cognify.config import get_cognify_config\n", - "from cognee.shared.data_models import KnowledgeGraph\n", - "from cognee.modules.data.models import Dataset, Data\n", - "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n", - "from cognee.modules.data.methods import get_datasets, get_datasets_by_name\n", - "from cognee.modules.pipelines.tasks.Task import Task\n", - "from cognee.modules.pipelines import run_tasks, run_tasks_parallel\n", - "from cognee.modules.users.models import User\n", - "from cognee.modules.users.methods import get_default_user\n", - "from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status\n", - "from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status\n", - "from cognee.tasks import chunk_extract_summary, \\\n", - " chunk_naive_llm_classifier, \\\n", - " chunk_remove_disconnected, \\\n", - " infer_data_ontology, \\\n", - " save_chunks_to_store, \\\n", - " chunk_update_check, \\\n", - " chunks_into_graph, \\\n", - " source_documents_to_chunks, \\\n", - " check_permissions_on_documents, \\\n", - " classify_documents" - ] - }, { "cell_type": "code", "execution_count": 2, @@ -308,64 +269,8 @@ "outputs": [], "source": [ "import cognee\n", - "from os import listdir, path\n", "\n", - "data_path = path.abspath(\".data\")\n", - "\n", - "results = await cognee.add([job_1, job_2,job_3,job_4,job_5,job_position], \"example\")\n", - "\n", - "for result in results:\n", - " print(result)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6f9b564de121713d", - "metadata": { - "ExecuteTime": { - "end_time": "2024-09-20T14:02:55.564445Z", - "start_time": "2024-09-20T14:02:55.562784Z" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "8911f8bd4f8c440a", - "metadata": { - "ExecuteTime": { - "end_time": "2024-09-20T14:02:56.714408Z", - "start_time": "2024-09-20T14:02:56.711812Z" - } - }, - "outputs": [], - "source": [ - "# from enum import Enum, auto\n", - "# from typing import Optional, List, Union, Dict, Any\n", - "# from pydantic import BaseModel, Field\n", - "# \n", - "# class Node(BaseModel):\n", - "# \"\"\"Node in a knowledge graph.\"\"\"\n", - "# id: str\n", - "# name: str\n", - "# type: str\n", - "# description: str\n", - "# properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the node.\")\n", - "# \n", - "# class Edge(BaseModel):\n", - "# \"\"\"Edge in a knowledge graph.\"\"\"\n", - "# source_node_id: str\n", - "# target_node_id: str\n", - "# relationship_name: str\n", - "# properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the edge.\")\n", - "# \n", - "# class KnowledgeGraph(BaseModel):\n", - "# \"\"\"Knowledge graph.\"\"\"\n", - "# nodes: List[Node] = Field(..., default_factory=list)\n", - "# edges: List[Edge] = Field(..., default_factory=list)" + "await cognee.add([job_1, job_2, job_3, job_4, job_5, job_position], \"example\")\n" ] }, { @@ -380,6 +285,21 @@ }, "outputs": [], "source": [ + "from cognee.shared.data_models import KnowledgeGraph\n", + "from cognee.modules.data.models import Dataset, Data\n", + "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n", + "from cognee.modules.pipelines.tasks.Task import Task\n", + "from cognee.modules.pipelines import run_tasks\n", + "from cognee.modules.users.models import User\n", + "from cognee.tasks import chunk_remove_disconnected, \\\n", + " infer_data_ontology, \\\n", + " save_chunks_to_store, \\\n", + " chunk_update_check, \\\n", + " chunks_into_graph, \\\n", + " source_documents_to_chunks, \\\n", + " check_permissions_on_documents, \\\n", + " classify_documents\n", + "\n", "async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n", " data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n", "\n", @@ -421,8 +341,13 @@ }, "outputs": [], "source": [ + "from cognee.modules.users.methods import get_default_user\n", + "from cognee.modules.data.methods import get_datasets_by_name\n", + "\n", "user = await get_default_user()\n", + "\n", "datasets = await get_datasets_by_name([\"example\"], user.id)\n", + "\n", "await run_cognify_pipeline(datasets[0], user)" ] }, @@ -490,7 +415,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" },