feat: add sqlalchemy as dlt destination (#137)

* feat: add sqlalchemy as dlt destination * Fix the demo, update Readme * fix: add 1.5 notebook --------- Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
2024-09-21 15:58:28 +02:00 · 2024-09-21 15:58:28 +02:00 · a9433e9283
commit a9433e9283
parent a09f7991e2
56 changed files with 2435 additions and 2554 deletions
--- a/.github/workflows/test_neo4j.yml
+++ b/.github/workflows/test_neo4j.yml
@ -18,13 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml

-  setup_docker:
-    name: Set up Docker Buildx
-    runs-on: ubuntu-latest
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
  run_neo4j_integration_test:
    name: test
    needs: get_docs_changes
@ -35,18 +28,6 @@ jobs:
      run:
        shell: bash

-    services:
-      postgres:
-        image: postgres:latest
-        env:
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        volumes:
-          - postgres_data:/var/lib/postgresql/data
-        ports:
-          - 5432:5432
-
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -66,18 +47,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction

-      - name: Create .cognee_system directory and print path
-        run: |
-          mkdir .cognee_system
-          echo $(pwd)/.cognee_system
-
-      - name: Wait for PostgreSQL to be ready
-        run: |
-          echo "Waiting for PostgreSQL to be ready..."
-          until pg_isready -h localhost -p 5432 -U cognee; do
-            sleep 1
-          done
-
      - name: Run default Neo4j
        env:
          ENV: 'dev'
@ -85,14 +54,4 @@ jobs:
          GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
          GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
          GRAPH_DATABASE_USERNAME: "neo4j"
-          DB_USER: cognee
-          DB_PASSWORD: cognee
-          DB_NAME: cognee_db
-          DB_HOST: localhost
-          DB_PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
-          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_neo4j.py
--- a/.github/workflows/test_python_3_10.yml
+++ b/.github/workflows/test_python_3_10.yml
@ -18,15 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml

-  setup_docker:
-    name: Set up Docker Buildx
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
  run_common:
    name: test
    needs: get_docs_changes
@ -38,19 +29,6 @@ jobs:
      run:
        shell: bash

-    services:
-      postgres:
-        image: postgres:latest
-        env:
-          PGUSER: cognee
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        volumes:
-          - postgres_data:/var/lib/postgresql/data
-        ports:
-          - 5432:5432
-
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -71,23 +49,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction

-      - name: Create .cognee_system directory and print path
-        run: |
-          mkdir .cognee_system
-          echo $(pwd)/.cognee_system
-
-      - name: Wait for PostgreSQL to be ready
-        env:
-          PGUSER: cognee
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        run: |
-          echo "Waiting for PostgreSQL to be ready..."
-          until pg_isready -h localhost -p 5432 -U cognee; do
-            sleep 1
-          done
-
      - name: Run tests
        run: poetry run pytest tests/

@ -95,16 +56,6 @@ jobs:
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          DB_HOST: localhost
-          DB_USERNAME: cognee
-          DB_PASSWORD: cognee
-          DB_DATABASE: cognee_db
-          DB_PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
-          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_library.py

      - name: Clean up disk space
--- a/.github/workflows/test_python_3_11.yml
+++ b/.github/workflows/test_python_3_11.yml
@ -18,15 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml

-  setup_docker:
-    name: Set up Docker Buildx
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
  run_common:
    name: test
    needs: get_docs_changes
@ -38,19 +29,6 @@ jobs:
      run:
        shell: bash

-    services:
-      postgres:
-        image: postgres:latest
-        env:
-          PGUSER: cognee
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        volumes:
-          - postgres_data:/var/lib/postgresql/data
-        ports:
-          - 5432:5432
-
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -71,23 +49,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction

-      - name: Create .cognee_system directory and print path
-        run: |
-          mkdir .cognee_system
-          echo $(pwd)/.cognee_system
-
-      - name: Wait for PostgreSQL to be ready
-        env:
-          PGUSER: cognee
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        run: |
-          echo "Waiting for PostgreSQL to be ready..."
-          until pg_isready -h localhost -p 5432 -U cognee; do
-            sleep 1
-          done
-
      - name: Run tests
        run: poetry run pytest tests/

@ -95,16 +56,6 @@ jobs:
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          DB_HOST: localhost
-          DB_USERNAME: cognee
-          DB_PASSWORD: cognee
-          DB_DATABASE: cognee_db
-          DB_PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
-          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_library.py

      - name: Clean up disk space
--- a/.github/workflows/test_python_3_9.yml
+++ b/.github/workflows/test_python_3_9.yml
@ -18,15 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml

-  setup_docker:
-    name: Set up Docker Buildx
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
  run_common:
    name: test
    needs: get_docs_changes
@ -38,19 +29,6 @@ jobs:
      run:
        shell: bash

-    services:
-      postgres:
-        image: postgres:latest
-        env:
-          PGUSER: cognee
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        volumes:
-          - postgres_data:/var/lib/postgresql/data
-        ports:
-          - 5432:5432
-
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -71,23 +49,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction

-      - name: Create .cognee_system directory and print path
-        run: |
-          mkdir .cognee_system
-          echo $(pwd)/.cognee_system
-
-      - name: Wait for PostgreSQL to be ready
-        env:
-          PGUSER: cognee
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        run: |
-          echo "Waiting for PostgreSQL to be ready..."
-          until pg_isready -h localhost -p 5432 -U cognee; do
-            sleep 1
-          done
-
      - name: Run tests
        run: poetry run pytest tests/

@ -95,16 +56,6 @@ jobs:
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          DB_HOST: localhost
-          DB_USERNAME: cognee
-          DB_PASSWORD: cognee
-          DB_DATABASE: cognee_db
-          DB_PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
-          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_library.py

      - name: Clean up disk space
--- a/.github/workflows/test_qdrant.yml
+++ b/.github/workflows/test_qdrant.yml
@ -18,13 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml

-  setup_docker:
-    name: Set up Docker Buildx
-    runs-on: ubuntu-latest
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
  run_qdrant_integration_test:
    name: test
    needs: get_docs_changes
@ -35,18 +28,6 @@ jobs:
      run:
        shell: bash

-    services:
-      postgres:
-        image: postgres:latest
-        env:
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        volumes:
-          - postgres_data:/var/lib/postgresql/data
-        ports:
-          - 5432:5432
-
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -66,32 +47,10 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction

-      - name: Create .cognee_system directory and print path
-        run: |
-          mkdir .cognee_system
-          echo $(pwd)/.cognee_system
-
-      - name: Wait for PostgreSQL to be ready
-        run: |
-          echo "Waiting for PostgreSQL to be ready..."
-          until pg_isready -h localhost -p 5432 -U cognee; do
-            sleep 1
-          done
-
      - name: Run default Qdrant
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
          VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
-          DB_USER: cognee
-          DB_PASSWORD: cognee
-          DB_NAME: cognee_db
-          DB_HOST: localhost
-          DB_PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
-          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_qdrant.py
--- a/.github/workflows/test_weaviate.yml
+++ b/.github/workflows/test_weaviate.yml
@ -18,13 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml

-  setup_docker:
-    name: Set up Docker Buildx
-    runs-on: ubuntu-latest
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
  run_weaviate_integration_test:
    name: test
    needs: get_docs_changes
@ -35,18 +28,6 @@ jobs:
      run:
        shell: bash

-    services:
-      postgres:
-        image: postgres:latest
-        env:
-          POSTGRES_USER: cognee
-          POSTGRES_PASSWORD: cognee
-          POSTGRES_DB: cognee_db
-        volumes:
-          - postgres_data:/var/lib/postgresql/data
-        ports:
-          - 5432:5432
-
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -66,32 +47,10 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction

-      - name: Create .cognee_system directory and print path
-        run: |
-          mkdir .cognee_system
-          echo $(pwd)/.cognee_system
-
-      - name: Wait for PostgreSQL to be ready
-        run: |
-          echo "Waiting for PostgreSQL to be ready..."
-          until pg_isready -h localhost -p 5432 -U cognee; do
-            sleep 1
-          done
-
      - name: Run default Weaviate
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }}
          VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }}
-          DB_USER: cognee
-          DB_PASSWORD: cognee
-          DB_NAME: cognee_db
-          DB_HOST: localhost
-          DB_PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
-          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
-          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
-          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_weaviate.py
--- a/README.md
+++ b/README.md
@ -18,24 +18,12 @@ We build for developers who need a reliable, production-ready data layer for AI
  </a>
 </p>

-cognee implements scalable, modular data pipelines that allow for creating the LLM-enriched data layer using graph and vector stores.
-
-
-
-
-
-
-<p>
-  <i> cognee aims to be dbt for LLMOps</i>
-</p>
-

+## What is cognee? 

+cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you ability to interconnect and retrieve past conversations, documents, audio transcriptions, while also reducing hallucinations, developer effort and cost.
 Try it in a Google collab  <a href="https://colab.research.google.com/drive/1jayZ5JRwDaUGFvCw9UZySBG-iB9gpYfu?usp=sharing">notebook</a>  or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a>

-
-
-
 If you have questions, join our  <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community


@ -58,7 +46,7 @@ poetry add cognee
 ```


-## 💻 Usage
+## 💻 Basic Usage

 ### Setup

@ -75,24 +63,6 @@ cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
 ```
 You can use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>

-In the next step make sure to launch a Postgres instance. Here is an example from our docker-compose:
-```
-  postgres:
-    image: postgres:latest
-    container_name: postgres
-    environment:
-      POSTGRES_USER: cognee
-      POSTGRES_PASSWORD: cognee
-      POSTGRES_DB: cognee_db
-    volumes:
-      - postgres_data:/var/lib/postgresql/data
-    ports:
-      - 5432:5432
-    networks:
-      - cognee-network
-```
-
-
 If you are using Networkx, create an account on Graphistry to visualize results:
 ```
   
@ -106,12 +76,7 @@ docker-compose up cognee
 ```
 Then navigate to localhost:3000/wizard

-### Run the default example
-
-Make sure to launch the Postgres instance first. Navigate to the cognee folder and run:
-```
-docker compose up postgres
-```
+### Simple example

 Run the default cognee pipeline:

@ -123,7 +88,7 @@ text = """Natural language processing (NLP) is an interdisciplinary

 await cognee.add([text], "example_dataset") # Add a new piece of information

-await cognee.cognify() # Use LLMs and cognee to create knowledge
+await cognee.cognify() # Use LLMs and cognee to create a semantic graph

 await search_results = cognee.search("SIMILARITY", {'query': 'Tell me about NLP'}) # Query cognee for the knowledge

@ -132,19 +97,20 @@ print(search_results)
 ```


-### Create your pipelines
+### Create your own memory store
+
+cognee framework consists of tasks that can be grouped into pipelines.
+Each task can be an independent part of business logic, that can be tied to other tasks to form a pipeline.
+These tasks persist data into your memory store enabling you to search for relevant context of past conversations, documents, or any other data you have stored.
+
+
+### Example: Classify your documents

-cognee framework consists of tasks that can be grouped into pipelines. Each task can be an independent part of business logic, that can be tied to other tasks to form a pipeline.
 Here is an example of how it looks for a default cognify pipeline:

-
 1. To prepare the data for the pipeline run, first we need to add it to our metastore and normalize it:

-Start with: 
-```
-docker compose up postgres
-```
-And then run: 
+Start with:
 ```
 text = """Natural language processing (NLP) is an interdisciplinary
       subfield of computer science and information retrieval"""
@ -158,90 +124,62 @@ Here we show an example of creating a naive LLM classifier that takes a Pydantic
 We provided just a snippet for reference, but feel free to check out the implementation in our repo. 

 ```
-async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]):
-    if len(data_chunks) == 0:
-        return data_chunks
-
+async def chunk_naive_llm_classifier(
+    data_chunks: list[DocumentChunk],
+    classification_model: Type[BaseModel]
+):
+    # Extract classifications asynchronously
    chunk_classifications = await asyncio.gather(
-        *[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
+        *(extract_categories(chunk.text, classification_model) for chunk in data_chunks)
    )

-    classification_data_points = []
-
-    for chunk_index, chunk in enumerate(data_chunks):
-        chunk_classification = chunk_classifications[chunk_index]
-        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
-        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
-
-        for classification_subclass in chunk_classification.label.subclass:
-            classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
+    # Collect classification data points using a set to avoid duplicates
+    classification_data_points = {
+        uuid5(NAMESPACE_OID, cls.label.type)
+        for cls in chunk_classifications
+    } | {
+        uuid5(NAMESPACE_OID, subclass.value)
+        for cls in chunk_classifications
+        for subclass in cls.label.subclass
+    }

    vector_engine = get_vector_engine()
+    collection_name = "classification"

+    # Define the payload schema
    class Keyword(BaseModel):
        uuid: str
        text: str
        chunk_id: str
        document_id: str

-    collection_name = "classification"
-
-    if await vector_engine.has_collection(collection_name):
-        existing_data_points = await vector_engine.retrieve(
-            collection_name,
-            list(set(classification_data_points)),
-        ) if len(classification_data_points) > 0 else []
-
-        existing_points_map = {point.id: True for point in existing_data_points}
+    # Ensure the collection exists and retrieve existing data points
+    if not await vector_engine.has_collection(collection_name):
+        await vector_engine.create_collection(collection_name, payload_schema=Keyword)
+        existing_points_map = {}
    else:
        existing_points_map = {}
-        await vector_engine.create_collection(collection_name, payload_schema=Keyword)
-
-    data_points = []
-    nodes = []
-    edges = []
-
-    for (chunk_index, data_chunk) in enumerate(data_chunks):
-        chunk_classification = chunk_classifications[chunk_index]
-        classification_type_label = chunk_classification.label.type
-        classification_type_id = uuid5(NAMESPACE_OID, classification_type_label)
+    return data_chunks

 ...

 ```

-To see existing tasks, have a look at the cognee.tasks
+We have a large number of tasks that can be used in your pipelines, and you can also create your own tasks to fit your business logic.


 3. Once we have our tasks, it is time to group them into a pipeline.
-This snippet shows how a group of tasks can be added to a pipeline, and how they can pass the information forward from one to another. 
+This simplified snippet demonstrates how tasks can be added to a pipeline, and how they can pass the information forward from one to another. 

 ```
-            tasks = [
-                Task(document_to_ontology, root_node_id = root_node_id),
-                Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type
-                Task(chunk_to_graph_decomposition, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data
-                Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
-                Task(chunk_update_check, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks
-                Task(
-                    save_chunks_to_store,
-                    collection_name = "chunks",
-                ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other)
-                run_tasks_parallel([
-                    Task(
-                        chunk_extract_summary,
-                        summarization_model = cognee_config.summarization_model,
-                        collection_name = "chunk_summaries",
-                    ), # Summarize the document chunks
-                    Task(
-                        chunk_naive_llm_classifier,
-                        classification_model = cognee_config.classification_model,
-                    ),
-                ]),
-                Task(chunk_remove_disconnected), # Remove the obsolete document chunks.
-            ]
+            

-            pipeline = run_tasks(tasks, documents)
+Task(
+    chunk_naive_llm_classifier,
+    classification_model = cognee_config.classification_model,
+)
+
+pipeline = run_tasks(tasks, documents)

 ```

@ -277,3 +215,23 @@ Check out our demo notebook [here](https://github.com/topoteretes/cognee/blob/ma


 [![Star History Chart](https://api.star-history.com/svg?repos=topoteretes/cognee&type=Date)](https://star-history.com/#topoteretes/cognee&Date)
+
+## Get Started
+
+### Install Server
+
+Please see the [cognee Quick Start Guide](https://topoteretes.github.io/cognee/quickstart/) for important configuration information.
+
+```bash
+docker compose up
+```
+
+
+
+### Install SDK
+
+Please see the cognee [Develoment Guide](https://topoteretes.github.io/cognee/quickstart/) for important beta information and usage instructions.
+
+```bash
+pip install cognee
+```
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -14,8 +14,6 @@ from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user


-from cognee.infrastructure.databases.relational import create_db_and_tables
-
 # Set up logging
 logging.basicConfig(
    level=logging.INFO,  # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
@ -34,8 +32,12 @@ from contextlib import asynccontextmanager

@asynccontextmanager
 async def lifespan(app: FastAPI):
+    from cognee.infrastructure.databases.relational import create_db_and_tables
+    from cognee.modules.users.methods import get_default_user
+  
    # Not needed if you setup a migration system like Alembic
    await create_db_and_tables()
+    await get_default_user()
    yield

 app = FastAPI(debug = os.getenv("ENV") != "prod", lifespan = lifespan)
@ -394,10 +396,10 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000):
    try:
        logger.info("Starting server at %s:%s", host, port)

-        import asyncio
-        from cognee.modules.data.deletion import prune_system, prune_data
-        asyncio.run(prune_data())
-        asyncio.run(prune_system(metadata = True))
+        # import asyncio
+        # from cognee.modules.data.deletion import prune_system, prune_data
+        # asyncio.run(prune_data())
+        # asyncio.run(prune_system(metadata = True))

        uvicorn.run(app, host = host, port = port)
    except Exception as e:
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@ -2,7 +2,6 @@ from typing import List, Union, BinaryIO
 from os import path
 import asyncio
 import dlt
-import duckdb

 import cognee.modules.ingestion as ingestion
 from cognee.infrastructure.files.storage import LocalStorage
@ -81,22 +80,16 @@ async def add_files(file_paths: List[str], dataset_name: str, user: User = None)

    relational_config = get_relational_config()

-    if relational_config.db_provider == "duckdb":
-        db = duckdb.connect(relational_config.db_file_path)
-
-        destination = dlt.destinations.duckdb(
-          credentials = db,
-        )
-    else:
-        destination = dlt.destinations.postgres(
-            credentials = {
-                "host": relational_config.db_host,
-                "port": relational_config.db_port,
-                "user": relational_config.db_user,
-                "password": relational_config.db_password,
-                "database": relational_config.db_name,
-            },
-        )
+    destination = dlt.destinations.sqlalchemy(
+        credentials = {
+            "host": relational_config.db_host,
+            "port": relational_config.db_port,
+            "username": relational_config.db_username,
+            "password": relational_config.db_password,
+            "database": relational_config.db_name,
+            "drivername": relational_config.db_provider,
+        },
+    )

    pipeline = dlt.pipeline(
        pipeline_name = "file_load_from_filesystem",
--- a/cognee/api/v1/cognify/cognify_v2.py
+++ b/cognee/api/v1/cognify/cognify_v2.py
@ -46,72 +46,6 @@ async def cognify(datasets: Union[str, list[str]] = None, user: User = None):
    if type(datasets[0]) == str:
        datasets = await get_datasets_by_name(datasets, user.id)

-    async def run_cognify_pipeline(dataset: Dataset):
-        data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)
-
-        document_ids_str = [str(document.id) for document in data_documents]
-
-        dataset_id = dataset.id
-        dataset_name = generate_dataset_name(dataset.name)
-
-        async with update_status_lock:
-            task_status = await get_pipeline_status([dataset_id])
-
-            if dataset_id in task_status and task_status[dataset_id] == "DATASET_PROCESSING_STARTED":
-                logger.info("Dataset %s is already being processed.", dataset_name)
-                return
-
-            await log_pipeline_status(dataset_id, "DATASET_PROCESSING_STARTED", {
-                "dataset_name": dataset_name,
-                "files": document_ids_str,
-            })
-        try:
-            cognee_config = get_cognify_config()
-
-            root_node_id = None
-
-            tasks = [
-                Task(classify_documents),
-                Task(check_permissions_on_documents, user = user, permissions = ["write"]),
-                Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),
-                Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type
-                Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = "entities", task_config = { "batch_size": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
-                Task(chunk_update_check, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks
-                Task(
-                    save_chunks_to_store,
-                    collection_name = "chunks",
-                ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other)
-                run_tasks_parallel([
-                    Task(
-                        chunk_extract_summary,
-                        summarization_model = cognee_config.summarization_model,
-                        collection_name = "chunk_summaries",
-                    ), # Summarize the document chunks
-                    Task(
-                        chunk_naive_llm_classifier,
-                        classification_model = cognee_config.classification_model,
-                    ),
-                ]),
-                Task(chunk_remove_disconnected), # Remove the obsolete document chunks.
-            ]
-
-            pipeline = run_tasks(tasks, data_documents)
-
-            async for result in pipeline:
-                print(result)
-
-            await log_pipeline_status(dataset_id, "DATASET_PROCESSING_FINISHED", {
-                "dataset_name": dataset_name,
-                "files": document_ids_str,
-            })
-        except Exception as error:
-            await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERROR", {
-                "dataset_name": dataset_name,
-                "files": document_ids_str,
-            })
-            raise error
-
-
    existing_datasets_map = {
        generate_dataset_name(dataset.name): True for dataset in existing_datasets
    }
@ -122,10 +56,76 @@ async def cognify(datasets: Union[str, list[str]] = None, user: User = None):
        dataset_name = generate_dataset_name(dataset.name)

        if dataset_name in existing_datasets_map:
-            awaitables.append(run_cognify_pipeline(dataset))
+            awaitables.append(run_cognify_pipeline(dataset, user))

    return await asyncio.gather(*awaitables)


+async def run_cognify_pipeline(dataset: Dataset, user: User):
+    data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)
+
+    document_ids_str = [str(document.id) for document in data_documents]
+
+    dataset_id = dataset.id
+    dataset_name = generate_dataset_name(dataset.name)
+
+    async with update_status_lock:
+        task_status = await get_pipeline_status([dataset_id])
+
+        if dataset_id in task_status and task_status[dataset_id] == "DATASET_PROCESSING_STARTED":
+            logger.info("Dataset %s is already being processed.", dataset_name)
+            return
+
+        await log_pipeline_status(dataset_id, "DATASET_PROCESSING_STARTED", {
+            "dataset_name": dataset_name,
+            "files": document_ids_str,
+        })
+    try:
+        cognee_config = get_cognify_config()
+
+        root_node_id = None
+
+        tasks = [
+            Task(classify_documents),
+            Task(check_permissions_on_documents, user = user, permissions = ["write"]),
+            Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),
+            Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type
+            Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = "entities", task_config = { "batch_size": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
+            Task(chunk_update_check, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks
+            Task(
+                save_chunks_to_store,
+                collection_name = "chunks",
+            ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other)
+            run_tasks_parallel([
+                Task(
+                    chunk_extract_summary,
+                    summarization_model = cognee_config.summarization_model,
+                    collection_name = "chunk_summaries",
+                ), # Summarize the document chunks
+                Task(
+                    chunk_naive_llm_classifier,
+                    classification_model = cognee_config.classification_model,
+                ),
+            ]),
+            Task(chunk_remove_disconnected), # Remove the obsolete document chunks.
+        ]
+
+        pipeline = run_tasks(tasks, data_documents)
+
+        async for result in pipeline:
+            print(result)
+
+        await log_pipeline_status(dataset_id, "DATASET_PROCESSING_FINISHED", {
+            "dataset_name": dataset_name,
+            "files": document_ids_str,
+        })
+    except Exception as error:
+        await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERROR", {
+            "dataset_name": dataset_name,
+            "files": document_ids_str,
+        })
+        raise error
+
+
 def generate_dataset_name(dataset_name: str) -> str:
    return dataset_name.replace(".", "_").replace(" ", "_")
--- a/cognee/infrastructure/databases/relational/DatabaseEngine.py
+++ b/cognee/infrastructure/databases/relational/DatabaseEngine.py
@ -1,23 +0,0 @@
-from typing import Protocol
-
-class DatabaseEngine(Protocol):
-    async def ensure_tables(self):
-        pass
-
-    def database_exists(self, db_name: str) -> bool:
-        pass
-
-    def create_database(self, db_name: str):
-        pass
-
-    def drop_database(self, db_name: str):
-        pass
-
-    async def table_exists(self, table_name: str) -> bool:
-        pass
-
-    async def create_tables(self):
-        pass
-
-    async def create(self, data):
-        pass
--- a/cognee/infrastructure/databases/relational/FakeAsyncSession.py
+++ b/cognee/infrastructure/databases/relational/FakeAsyncSession.py
@ -1,29 +0,0 @@
-import inspect
-from typing import Any
-
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import Session
-
-class FakeAsyncSession:
-    def __init__(self, session: Session):
-        self.session = session
-
-    def run_sync(self, *args, **kwargs):
-        return self.execute(*args, **kwargs)
-
-    def __getattr__(self, name: str) -> Any:
-        """
-        If the method being called is async in AsyncSession, create a fake async version
-        for Session so callers can `await` as usual. Think `commit`, `refresh`,
-        `delete`, etc.
-        """
-        async_session_attr = getattr(AsyncSession, name, None)
-        session_attr = getattr(self.session, name)
-
-        if not inspect.iscoroutinefunction(async_session_attr):
-            return session_attr
-
-        async def async_wrapper(*args, **kwargs):
-            return session_attr(*args, **kwargs)
-
-        return async_wrapper
--- a/cognee/infrastructure/databases/relational/init.py
+++ b/cognee/infrastructure/databases/relational/init.py
@ -1,7 +1,7 @@
 from .ModelBase import Base
-from .DatabaseEngine import DatabaseEngine
-from .sqlite.SqliteEngine import SqliteEngine
-from .duckdb.DuckDBAdapter import DuckDBAdapter
 from .config import get_relational_config
 from .create_db_and_tables import create_db_and_tables
 from .get_relational_engine import get_relational_engine
+
+# Global data types
+from .data_types.UUID import UUID
--- a/cognee/infrastructure/databases/relational/config.py
+++ b/cognee/infrastructure/databases/relational/config.py
@ -1,4 +1,5 @@
 import os
+from typing import Union
 from functools import lru_cache
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from cognee.root_dir import get_absolute_path
@ -6,13 +7,11 @@ from cognee.root_dir import get_absolute_path
 class RelationalConfig(BaseSettings):
    db_path: str =  os.path.join(get_absolute_path(".cognee_system"), "databases")
    db_name: str =  "cognee_db"
-    db_host: str =  "localhost"
-    db_port: str =  "5432"
-    db_user: str = "cognee"
-    db_password: str =  "cognee"
-    db_provider: str = "postgresql+asyncpg"
-    db_file_path: str = os.path.join(db_path, db_name)
-
+    db_host: Union[str, None] = None # "localhost"
+    db_port: Union[str, None] = None # "5432"
+    db_username: Union[str, None] = None # "cognee"
+    db_password: Union[str, None] = None # "cognee"
+    db_provider: str = "sqlite"

    model_config = SettingsConfigDict(env_file = ".env", extra = "allow")

@ -22,7 +21,7 @@ class RelationalConfig(BaseSettings):
            "db_name": self.db_name,
            "db_host": self.db_host,
            "db_port": self.db_port,
-            "db_user": self.db_user,
+            "db_username": self.db_username,
            "db_password": self.db_password,
            "db_provider": self.db_provider,
        }
--- a/cognee/infrastructure/databases/relational/create_db_and_tables.py
+++ b/cognee/infrastructure/databases/relational/create_db_and_tables.py
@ -1,9 +1,14 @@
+from cognee.infrastructure.files.storage import LocalStorage
 from .ModelBase import Base
-from .get_relational_engine import get_relational_engine
+from .get_relational_engine import get_relational_engine, get_relational_config

 async def create_db_and_tables():
+    relational_config = get_relational_config()
    relational_engine = get_relational_engine()

+    if relational_engine.engine.dialect.name == "sqlite":
+        LocalStorage.ensure_directory_exists(relational_config.db_path)
+
    async with relational_engine.engine.begin() as connection:
        if len(Base.metadata.tables.keys()) > 0:
            await connection.run_sync(Base.metadata.create_all)
--- a/cognee/infrastructure/databases/relational/create_relational_engine.py
+++ b/cognee/infrastructure/databases/relational/create_relational_engine.py
@ -3,18 +3,16 @@ from .sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
 def create_relational_engine(
    db_path: str,
    db_name: str,
-    db_provider: str,
    db_host: str,
    db_port: str,
-    db_user: str,
+    db_username: str,
    db_password: str,
+    db_provider: str,
 ):
-    return SQLAlchemyAdapter(
-        db_name = db_name,
-        db_path = db_path,
-        db_type = db_provider,
-        db_host = db_host,
-        db_port = db_port,
-        db_user = db_user,
-        db_password = db_password
-    )
+    if db_provider == "sqlite":
+        connection_string = f"sqlite+aiosqlite:///{db_path}/{db_name}"
+
+    if db_provider == "postgres":
+        connection_string = f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
+
+    return SQLAlchemyAdapter(connection_string)
--- a/cognee/infrastructure/databases/relational/data_types/UUID.py
+++ b/cognee/infrastructure/databases/relational/data_types/UUID.py
@ -0,0 +1,43 @@
+import uuid
+
+from sqlalchemy.types import TypeDecorator, BINARY
+from sqlalchemy.dialects.postgresql import UUID as psqlUUID
+
+class UUID(TypeDecorator):
+    """Platform-independent GUID type.
+
+    Uses Postgresql's UUID type, otherwise uses
+    BINARY(16), to store UUID.
+
+    """
+    impl = BINARY
+
+    def load_dialect_impl(self, dialect):
+        if dialect.name == 'postgresql':
+            return dialect.type_descriptor(psqlUUID())
+        else:
+            return dialect.type_descriptor(BINARY(16))
+
+    def process_bind_param(self, value, dialect):
+        if value is None:
+            return value
+        else:
+            if not isinstance(value, uuid.UUID):
+                if isinstance(value, bytes):
+                    value = uuid.UUID(bytes = value)
+                elif isinstance(value, int):
+                    value = uuid.UUID(int = value)
+                elif isinstance(value, str):
+                    value = uuid.UUID(value)
+        if dialect.name == 'postgresql':
+            return str(value)
+        else:
+            return value.bytes
+
+    def process_result_value(self, value, dialect):
+        if value is None:
+            return value
+        if dialect.name == 'postgresql':
+            return uuid.UUID(value)
+        else:
+            return uuid.UUID(bytes = value)
--- a/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py
+++ b/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py
@ -1,169 +0,0 @@
-import duckdb
-import os
-class DuckDBAdapter():
-    def __init__(self, db_path: str, db_name: str):
-        self.db_location = os.path.abspath(os.path.join(db_path, db_name))
-
-        self.get_connection = lambda: duckdb.connect(self.db_location)
-
-    def get_datasets(self):
-        with self.get_connection() as connection:
-            tables = connection.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").to_df().to_dict("list")
-
-        return list(
-            filter(
-                lambda schema_name: not schema_name.endswith("staging") and schema_name != "cognee",
-                tables["schema_name"]
-            )
-        )
-
-    def get_files_metadata(self, dataset_name: str):
-        with self.get_connection() as connection:
-            return connection.sql(f"SELECT id, name, file_path, extension, mime_type FROM {dataset_name}.file_metadata;").to_df().to_dict("records")
-
-    def create_table(self, schema_name: str, table_name: str, table_config: list[dict]):
-        fields_query_parts = []
-
-        for table_config_item in table_config:
-            fields_query_parts.append(f"{table_config_item['name']} {table_config_item['type']}")
-
-        with self.get_connection() as connection:
-            query = f"CREATE SCHEMA IF NOT EXISTS {schema_name};"
-            connection.execute(query)
-
-        with self.get_connection() as connection:
-            query = f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} ({', '.join(fields_query_parts)});"
-            connection.execute(query)
-
-    def delete_table(self, table_name: str):
-        with self.get_connection() as connection:
-            query = f"DROP TABLE IF EXISTS {table_name};"
-            connection.execute(query)
-
-    def insert_data(self, schema_name: str, table_name: str, data: list[dict]):
-        def get_values(data_entry: list):
-            return ", ".join([f"'{value}'" if isinstance(value, str) else value for value in data_entry])
-      
-        columns = ", ".join(data[0].keys())
-        values = ", ".join([f"({get_values(data_entry.values())})" for data_entry in data])
-
-        with self.get_connection() as connection:
-            query = f"INSERT INTO {schema_name}.{table_name} ({columns}) VALUES {values};"
-            connection.execute(query)
-
-    def get_data(self, table_name: str, filters: dict = None):
-        with self.get_connection() as connection:
-            def get_values(values: list):
-                return ", ".join([f"'{value}'" for value in values])
-
-            def get_filters(filters: dict):
-                return " AND ".join([
-                    f"{key} IN ({get_values(value)})" if isinstance(value, list)
-                    else f"{key} = '{value}'" for (key, value) in filters.items()
-                ])
-
-            query = f"SELECT * FROM {table_name}" + (";" if filters is None else f" WHERE {get_filters(filters)};")
-            results = connection.sql(query).to_df().to_dict("records")
-
-            return {
-                result["data_id"]: result["status"] for result in results
-            }
-
-    def execute_query(self, query):
-        with self.get_connection() as connection:
-            return connection.sql(query).to_df().to_dict("records")
-
-    def load_cognify_data(self, data):
-        with self.get_connection() as connection:
-            # Ensure the "cognify" table exists
-            connection.execute("""
-                CREATE TABLE IF NOT EXISTS cognify (
-                    document_id STRING,
-                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                    updated_at TIMESTAMP DEFAULT NULL,
-                    processed BOOLEAN DEFAULT FALSE,
-                    document_id_target STRING NULL
-                );
-            """)
-
-        # Prepare the insert statement
-        insert_query = """
-            INSERT INTO cognify (document_id)
-            VALUES (?);
-        """
-
-        # Insert each record into the "cognify" table
-        for record in data:
-            with self.get_connection() as connection:
-                connection.execute(insert_query, [
-                    record.get("document_id"),
-                ])
-
-    def fetch_cognify_data(self, excluded_document_id: str):
-        # SQL command to create the "cognify" table with the specified columns
-        create_table_sql = """
-        CREATE TABLE IF NOT EXISTS cognify (
-            document_id STRING,
-            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-            updated_at TIMESTAMP DEFAULT NULL,
-            processed BOOLEAN DEFAULT FALSE,
-            document_id_target STRING NULL
-        );
-        """
-        with self.get_connection() as connection:
-            # Execute the SQL command to create the table
-            connection.execute(create_table_sql)
-
-        # SQL command to select data from the "cognify" table
-        select_data_sql = f"SELECT document_id, created_at, updated_at, processed FROM cognify WHERE document_id != '{excluded_document_id}' AND processed = FALSE;"
-
-        with self.get_connection() as connection:
-            # Execute the query and fetch the results
-            records = connection.sql(select_data_sql).to_df().to_dict("records")
-
-        # If records are fetched, update the "processed" column to "True"
-        if records:
-            # Fetching document_ids from the records to update the "processed" column
-            document_ids = tuple(record["document_id"] for record in records)
-            # SQL command to update the "processed" column to "True" for fetched records
-            update_data_sql = f"UPDATE cognify SET processed = TRUE WHERE document_id IN {document_ids};"
-
-            with self.get_connection() as connection:
-                # Execute the update query
-                connection.execute(update_data_sql)
-
-        # Return the fetched records
-        return records
-
-
-    def delete_cognify_data(self):
-        # SQL command to create the "cognify" table with the specified columns
-        create_table_sql = """
-        CREATE TABLE IF NOT EXISTS cognify (
-            document_id STRING,
-            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-            updated_at TIMESTAMP DEFAULT NULL,
-            processed BOOLEAN DEFAULT FALSE,
-            document_id_target STRING NULL
-        );
-        """
-
-        with self.get_connection() as connection:
-            # Execute the SQL command to create the table
-            connection.execute(create_table_sql)
-
-        with self.get_connection() as connection:
-            # SQL command to select data from the "cognify" table
-            select_data_sql = "DELETE FROM cognify;"
-            connection.sql(select_data_sql)
-            drop_data_sql = "DROP TABLE cognify;"
-            connection.sql(drop_data_sql)
-
-    def delete_database(self):
-        from cognee.infrastructure.files.storage import LocalStorage
-
-        if LocalStorage.file_exists(self.db_location):
-            LocalStorage.remove(self.db_location)
-
-        if LocalStorage.file_exists(self.db_location + ".wal"):
-            LocalStorage.remove(self.db_location + ".wal")
--- a/cognee/infrastructure/databases/relational/duckdb/init.py
+++ b/cognee/infrastructure/databases/relational/duckdb/init.py
--- a/cognee/infrastructure/databases/relational/relational_db_interface.py
+++ b/cognee/infrastructure/databases/relational/relational_db_interface.py
@ -1,26 +0,0 @@
-from abc import abstractmethod
-from typing import Protocol, TypeVar, Type, List
-
-RowDataType = TypeVar('RowDataType')
-
-class RelationalDBInterface(Protocol):
-    @abstractmethod
-    async def create_database(self, database_name: str, database_path: str): raise NotImplementedError
-
-    @abstractmethod
-    async def create_table(self, table_name: str, table_config: object): raise NotImplementedError
-
-    @abstractmethod
-    async def add_row(self, table_name: str, row_data: Type[RowDataType]): raise NotImplementedError
-
-    @abstractmethod
-    async def add_rows(self, table_name: str, rows_data: List[Type[RowDataType]]): raise NotImplementedError
-
-    @abstractmethod
-    async def get_row(self, table_name: str, row_id: str): raise NotImplementedError
-
-    @abstractmethod
-    async def update_row(self, table_name: str, row_id: str, row_data: Type[RowDataType]): raise NotImplementedError
-
-    @abstractmethod
-    async def delete_row(self, table_name: str, row_id: str): raise NotImplementedError
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@ -1,39 +1,18 @@
-import os
-import asyncio
 from typing import AsyncGenerator
 from contextlib import asynccontextmanager
-from sqlalchemy import create_engine, text, select
-from sqlalchemy.orm import sessionmaker, joinedload
+from sqlalchemy import text, select
+from sqlalchemy.orm import joinedload
 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
-from cognee.infrastructure.files.storage import LocalStorage
-from cognee.infrastructure.databases.relational.FakeAsyncSession import FakeAsyncSession
+
 from ..ModelBase import Base

-def make_async_sessionmaker(sessionmaker):
-    @asynccontextmanager
-    async def async_session_maker():
-        await asyncio.sleep(0.1)
-        session = FakeAsyncSession(sessionmaker())
-        try:
-            yield session
-        finally:
-            await session.close()  # Ensure the session is closed
-
-    return async_session_maker
-
 class SQLAlchemyAdapter():
-    def __init__(self, db_type: str, db_path: str, db_name: str, db_user: str, db_password: str, db_host: str, db_port: str):
-        self.db_location = os.path.abspath(os.path.join(db_path, db_name))
-        self.db_name = db_name
+    def __init__(self, connection_string: str):
+        self.engine = create_async_engine(connection_string)
+        self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)

-        if db_type == "duckdb":
-            LocalStorage.ensure_directory_exists(db_path)
-
-            self.engine = create_engine(f"duckdb:///{self.db_location}")
-            self.sessionmaker = make_async_sessionmaker(sessionmaker(bind=self.engine))
-        else:
-            self.engine = create_async_engine(f"postgresql+asyncpg://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}")
-            self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
+        if self.engine.dialect.name == "sqlite":
+            self.db_path = connection_string.split("///")[1]

    @asynccontextmanager
    async def get_async_session(self) -> AsyncGenerator[AsyncSession, None]:
@ -72,6 +51,7 @@ class SQLAlchemyAdapter():
            await connection.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE;"))

            await connection.close()
+
    async def insert_data(self, schema_name: str, table_name: str, data: list[dict]):
        columns = ", ".join(data[0].keys())
        values = ", ".join([f"({', '.join([f':{key}' for key in row.keys()])})" for row in data])
@ -80,6 +60,7 @@ class SQLAlchemyAdapter():
        async with self.engine.begin() as connection:
            await connection.execute(insert_query, data)
            await connection.close()
+
    async def get_data(self, table_name: str, filters: dict = None):
        async with self.engine.begin() as connection:
            query = f"SELECT * FROM {table_name}"
@ -113,11 +94,19 @@ class SQLAlchemyAdapter():
                print(f"Error dropping database tables: {e}")

    async def delete_database(self):
-        async with self.engine.begin() as connection:
-            try:
-                for table in Base.metadata.sorted_tables:
-                    drop_table_query = text(f'DROP TABLE IF EXISTS {table.name} CASCADE')
-                    await connection.execute(drop_table_query)
-                print("Database deleted successfully.")
-            except Exception as e:
-                print(f"Error deleting database: {e}")
+        try:
+            if self.engine.dialect.name == "sqlite":
+                from cognee.infrastructure.files.storage import LocalStorage
+
+                LocalStorage.remove(self.db_path)
+                self.db_path = None
+            else:
+                async with self.engine.begin() as connection:
+                    for table in Base.metadata.sorted_tables:
+                        drop_table_query = text(f'DROP TABLE IF EXISTS {table.name} CASCADE')
+                        await connection.execute(drop_table_query)
+
+        except Exception as e:
+            print(f"Error deleting database: {e}")
+
+        print("Database deleted successfully.")
--- a/cognee/infrastructure/databases/relational/sqlite/SqliteEngine.py
+++ b/cognee/infrastructure/databases/relational/sqlite/SqliteEngine.py
@ -1,82 +0,0 @@
-import os
-import asyncio
-from typing import Callable
-from sqlalchemy.inspection import inspect
-from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncEngine, AsyncSession, async_scoped_session
-from sqlalchemy.future import select
-from cognee.infrastructure.files.storage.LocalStorage import LocalStorage
-from ..DatabaseEngine import DatabaseEngine
-from ..ModelBase import Base
-from ..utils import with_rollback
-
-class SqliteEngine(DatabaseEngine):
-    db_path: str = None
-    db_name: str = None
-    engine: AsyncEngine = None
-    session_maker: Callable[[], async_scoped_session[AsyncSession]] = None
-    is_db_done: bool = False
-
-    def __init__(self, db_path: str, db_name: str):
-        self.db_path = db_path
-        self.db_name = db_name
-        self.db_location = db_path + "/" + db_name
-        self.engine = create_async_engine(
-            f"sqlite+aiosqlite:///{self.db_location}",
-            pool_recycle = 3600,
-            echo = False
-        )
-        self.session_maker = lambda: async_scoped_session(
-            async_sessionmaker(
-                bind = self.engine,
-                class_ = AsyncSession
-            ),
-            scopefunc = asyncio.current_task
-        )
-
-    async def ensure_tables(self):
-        if not self.database_exists(self.db_name):
-            self.create_database(self.db_name)
-
-            await self.create_tables()
-
-            self.is_db_done = True
-
-            return True
-
-    def database_exists(self, db_name: str) -> bool:
-        return os.path.exists(self.db_path + "/" + db_name)
-
-    def create_database(self, db_name: str):
-        LocalStorage.ensure_directory_exists(self.db_path)
-
-        with open(self.db_path + "/" + db_name, mode = "w+", encoding = "utf-8") as file:
-            file.write("")
-
-    def drop_database(self, db_name: str):
-        os.remove(self.db_location)
-
-    async def table_exists(self, table_name: str) -> bool:
-        return inspect(self.engine).has_table(table_name)
-
-    async def create_tables(self):
-        async with self.engine.begin() as connection:
-            return await connection.run_sync(Base.metadata.create_all)
-
-    async def create(self, data):
-        async with with_rollback(self.session_maker()) as session:
-            session.add(data)
-
-    async def query(self, query_term):
-        async with with_rollback(self.session_maker()) as session:
-            return await session.execute(query_term)
-
-    async def query_entity(self, entity):
-        async with with_rollback(self.session_maker()) as session:
-            return await session.execute(
-                select(type(entity))
-                    .where(type(entity).id == entity.id)
-            )
-
-    async def update(self, data_update_fn):
-        async with with_rollback(self.session_maker()):
-            data_update_fn()
--- a/cognee/infrastructure/databases/relational/sqlite/init.py
+++ b/cognee/infrastructure/databases/relational/sqlite/init.py
--- a/cognee/infrastructure/databases/relational/utils/init.py
+++ b/cognee/infrastructure/databases/relational/utils/init.py
@ -1 +0,0 @@
-from .with_rollback import with_rollback
--- a/cognee/infrastructure/databases/relational/utils/with_rollback.py
+++ b/cognee/infrastructure/databases/relational/utils/with_rollback.py
@ -1,18 +0,0 @@
-import logging
-from contextlib import asynccontextmanager
-from sqlalchemy.ext.asyncio import async_scoped_session
-logger = logging.getLogger(__name__)
-
-@asynccontextmanager
-async def with_rollback(session: async_scoped_session):
-    """Provide a transactional scope around a series of operations."""
-
-    try:
-        # async with session.begin():
-        yield session
-        await session.commit()
-        await session.remove()
-    except Exception as exception:
-        await session.rollback()
-        logger.error("Session rolled back due to: %s", str(exception))
-        raise exception
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@ -4,7 +4,7 @@ import litellm
 from litellm import aembedding
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine

-litellm.set_verbose = True
+litellm.set_verbose = False

 class LiteLLMEmbeddingEngine(EmbeddingEngine):
    api_key: str
--- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
+++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
@ -106,11 +106,10 @@ class QDrantAdapter(VectorDBInterface):
        points = [convert_to_qdrant_point(point) for point in data_points]

        try:
-            result = await client.upload_points(
+            client.upload_points(
                collection_name = collection_name,
                points = points
            )
-            return result
        except Exception as error:
            logger.error("Error uploading data points to Qdrant: %s", str(error))
            raise error
--- a/cognee/infrastructure/pipeline/models/Operation.py
+++ b/cognee/infrastructure/pipeline/models/Operation.py
@ -1,7 +1,7 @@
 from datetime import datetime
 from sqlalchemy.orm import Mapped, MappedColumn
-from sqlalchemy import Column, String, DateTime, ForeignKey, Enum, UUID, JSON
-from cognee.infrastructure.databases.relational import ModelBase
+from sqlalchemy import Column, DateTime, ForeignKey, Enum, JSON
+from cognee.infrastructure.databases.relational import Base, UUID

 class OperationType(Enum):
    MERGE_DATA = "MERGE_DATA"
@ -14,10 +14,10 @@ class OperationStatus(Enum):
    ERROR = "OPERATION_ERROR"
    CANCELLED = "OPERATION_CANCELLED"

-class Operation(ModelBase):
+class Operation(Base):
    __tablename__ = "operation"

-    id = Column(String, primary_key = True)
+    id = Column(UUID, primary_key = True)
    status = Column(Enum(OperationStatus))
    operation_type = Column(Enum(OperationType))

--- a/cognee/modules/data/methods/get_datasets_by_name.py
+++ b/cognee/modules/data/methods/get_datasets_by_name.py
@ -7,6 +7,8 @@ async def get_datasets_by_name(dataset_names: list[str], user_id: UUID) -> list[
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
+        if isinstance(dataset_names, str):
+            dataset_names = [dataset_names]
        datasets = (await session.scalars(
            select(Dataset)
                .filter(Dataset.owner_id == user_id)
--- a/cognee/modules/data/models/Data.py
+++ b/cognee/modules/data/models/Data.py
@ -2,14 +2,14 @@ from uuid import uuid4
 from typing import List
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, String, DateTime, UUID
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, String, DateTime
+from cognee.infrastructure.databases.relational import Base, UUID
 from .DatasetData import DatasetData

 class Data(Base):
    __tablename__ = "data"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    name = Column(String)
    extension = Column(String)
--- a/cognee/modules/data/models/Dataset.py
+++ b/cognee/modules/data/models/Dataset.py
@ -2,14 +2,14 @@ from uuid import uuid4
 from typing import List
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, Text, DateTime, UUID
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, Text, DateTime
+from cognee.infrastructure.databases.relational import Base, UUID
 from .DatasetData import DatasetData

 class Dataset(Base):
    __tablename__ = "datasets"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    name = Column(Text)

--- a/cognee/modules/data/models/DatasetData.py
+++ b/cognee/modules/data/models/DatasetData.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, DateTime, UUID, ForeignKey
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, DateTime, ForeignKey
+from cognee.infrastructure.databases.relational import Base, UUID

 class DatasetData(Base):
    __tablename__ = "dataset_data"

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))

-    dataset_id = Column(UUID(as_uuid = True), ForeignKey("datasets.id"), primary_key = True)
-    data_id = Column(UUID(as_uuid = True), ForeignKey("data.id"), primary_key = True)
+    dataset_id = Column(UUID, ForeignKey("datasets.id"), primary_key = True)
+    data_id = Column(UUID, ForeignKey("data.id"), primary_key = True)
--- a/cognee/modules/data/processing/document_types/AudioDocument.py
+++ b/cognee/modules/data/processing/document_types/AudioDocument.py
@ -6,18 +6,18 @@ from .Document import Document
 class AudioDocument(Document):
    type: str = "audio"
    title: str
-    file_path: str
-    chunking_strategy:str
+    raw_data_location: str
+    chunking_strategy: str

-    def __init__(self, id: UUID, title: str, file_path: str, chunking_strategy:str="paragraph"):
+    def __init__(self, id: UUID, title: str, raw_data_location: str, chunking_strategy:str="paragraph"):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location
        self.chunking_strategy = chunking_strategy

    def read(self):
        # Transcribe the audio file
-        result = get_llm_client().create_transcript(self.file_path)
+        result = get_llm_client().create_transcript(self.raw_data_location)
        text = result.text

        chunker = TextChunker(self.id, get_text = lambda: text)
@ -30,5 +30,5 @@ class AudioDocument(Document):
            id=str(self.id),
            type=self.type,
            title=self.title,
-            file_path=self.file_path,
+            raw_data_location=self.raw_data_location,
        )
--- a/cognee/modules/data/processing/document_types/Document.py
+++ b/cognee/modules/data/processing/document_types/Document.py
@ -5,7 +5,7 @@ class Document(Protocol):
    id: UUID
    type: str
    title: str
-    file_path: str
+    raw_data_location: str

    def read(self) -> str:
        pass
--- a/cognee/modules/data/processing/document_types/ImageDocument.py
+++ b/cognee/modules/data/processing/document_types/ImageDocument.py
@ -7,16 +7,16 @@ from .Document import Document
 class ImageDocument(Document):
    type: str = "image"
    title: str
-    file_path: str
+    raw_data_location: str

-    def __init__(self, id: UUID, title: str, file_path: str):
+    def __init__(self, id: UUID, title: str, raw_data_location: str):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location

    def read(self):
        # Transcribe the image file
-        result = get_llm_client().transcribe_image(self.file_path)
+        result = get_llm_client().transcribe_image(self.raw_data_location)
        text = result.choices[0].message.content

        chunker = TextChunker(self.id, get_text = lambda: text)
@ -29,5 +29,5 @@ class ImageDocument(Document):
            id=str(self.id),
            type=self.type,
            title=self.title,
-            file_path=self.file_path,
+            raw_data_location=self.raw_data_location,
        )
--- a/cognee/modules/data/processing/document_types/PdfDocument.py
+++ b/cognee/modules/data/processing/document_types/PdfDocument.py
@ -6,15 +6,15 @@ from .Document import Document
 class PdfDocument(Document):
    type: str = "pdf"
    title: str
-    file_path: str
+    raw_data_location: str

-    def __init__(self, id: UUID, title: str, file_path: str):
+    def __init__(self, id: UUID, title: str, raw_data_location: str):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location

    def read(self) -> PdfReader:
-        file = PdfReader(self.file_path)
+        file = PdfReader(self.raw_data_location)

        def get_text():
            for page in file.pages:
@ -32,5 +32,5 @@ class PdfDocument(Document):
            id = str(self.id),
            type = self.type,
            title = self.title,
-            file_path = self.file_path,
+            raw_data_location = self.raw_data_location,
        )
--- a/cognee/modules/data/processing/document_types/TextDocument.py
+++ b/cognee/modules/data/processing/document_types/TextDocument.py
@ -5,16 +5,16 @@ from .Document import Document
 class TextDocument(Document):
    type: str = "text"
    title: str
-    file_path: str
+    raw_data_location: str

-    def __init__(self, id: UUID, title: str, file_path: str):
+    def __init__(self, id: UUID, title: str, raw_data_location: str):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location

    def read(self):
        def get_text():
-            with open(self.file_path, mode = "r", encoding = "utf-8") as file:
+            with open(self.raw_data_location, mode = "r", encoding = "utf-8") as file:
                while True:
                    text = file.read(1024)

@ -34,5 +34,5 @@ class TextDocument(Document):
            id = str(self.id),
            type = self.type,
            title = self.title,
-            file_path = self.file_path,
+            raw_data_location = self.raw_data_location,
        )
--- a/cognee/modules/pipelines/models/Pipeline.py
+++ b/cognee/modules/pipelines/models/Pipeline.py
@ -1,14 +1,14 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, UUID, DateTime, String, Text
+from sqlalchemy import Column, DateTime, String, Text
 from sqlalchemy.orm import relationship, Mapped
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 from .PipelineTask import PipelineTask

 class Pipeline(Base):
    __tablename__ = "pipelines"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    name = Column(String)
    description = Column(Text, nullable = True)
--- a/cognee/modules/pipelines/models/PipelineRun.py
+++ b/cognee/modules/pipelines/models/PipelineRun.py
@ -1,16 +1,16 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, UUID, DateTime, String, JSON
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, DateTime, String, JSON
+from cognee.infrastructure.databases.relational import Base, UUID

 class PipelineRun(Base):
    __tablename__ = "pipeline_runs"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))

    status = Column(String)

-    run_id = Column(UUID(as_uuid = True), index = True)
+    run_id = Column(UUID, index = True)
    run_info = Column(JSON)
--- a/cognee/modules/pipelines/models/PipelineTask.py
+++ b/cognee/modules/pipelines/models/PipelineTask.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, DateTime, UUID, ForeignKey
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, DateTime, ForeignKey
+from cognee.infrastructure.databases.relational import Base, UUID

 class PipelineTask(Base):
    __tablename__ = "pipeline_task"

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))

-    pipeline_id = Column("pipeline", UUID(as_uuid = True), ForeignKey("pipeline.id"), primary_key = True)
-    task_id = Column("task", UUID(as_uuid = True), ForeignKey("task.id"), primary_key = True)
+    pipeline_id = Column("pipeline", UUID, ForeignKey("pipeline.id"), primary_key = True)
+    task_id = Column("task", UUID, ForeignKey("task.id"), primary_key = True)
--- a/cognee/modules/pipelines/models/Task.py
+++ b/cognee/modules/pipelines/models/Task.py
@ -1,14 +1,14 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, String, DateTime, UUID, Text
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, String, DateTime, Text
+from cognee.infrastructure.databases.relational import Base, UUID
 from .PipelineTask import PipelineTask

 class Task(Base):
    __tablename__ = "tasks"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    name = Column(String)
    description = Column(Text, nullable = True)
--- a/cognee/modules/pipelines/models/TaskRun.py
+++ b/cognee/modules/pipelines/models/TaskRun.py
@ -1,12 +1,12 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, UUID, DateTime, String, JSON
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, DateTime, String, JSON
+from cognee.infrastructure.databases.relational import Base, UUID

 class TaskRun(Base):
    __tablename__ = "task_runs"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    task_name = Column(String)

--- a/cognee/modules/users/models/ACL.py
+++ b/cognee/modules/users/models/ACL.py
@ -1,20 +1,20 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, ForeignKey, DateTime, UUID
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, ForeignKey, DateTime
+from cognee.infrastructure.databases.relational import Base, UUID
 from .ACLResources import ACLResources

 class ACL(Base):
    __tablename__ = "acls"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))

-    principal_id = Column(UUID(as_uuid = True), ForeignKey("principals.id"))
-    permission_id = Column(UUID(as_uuid = True), ForeignKey("permissions.id"))
+    principal_id = Column(UUID, ForeignKey("principals.id"))
+    permission_id = Column(UUID, ForeignKey("permissions.id"))

    principal = relationship("Principal")
    permission = relationship("Permission")
--- a/cognee/modules/users/models/ACLResources.py
+++ b/cognee/modules/users/models/ACLResources.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, ForeignKey, UUID, DateTime
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, ForeignKey, DateTime
+from cognee.infrastructure.databases.relational import Base, UUID

 class ACLResources(Base):
    __tablename__ = "acl_resources"

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))

-    acl_id = Column(UUID(as_uuid = True), ForeignKey("acls.id"), primary_key = True)
-    resource_id = Column(UUID(as_uuid = True), ForeignKey("resources.id"), primary_key = True)
+    acl_id = Column(UUID, ForeignKey("acls.id"), primary_key = True)
+    resource_id = Column(UUID, ForeignKey("resources.id"), primary_key = True)
--- a/cognee/modules/users/models/Group.py
+++ b/cognee/modules/users/models/Group.py
@ -1,12 +1,13 @@
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, String, ForeignKey, UUID
+from sqlalchemy import Column, String, ForeignKey
+from cognee.infrastructure.databases.relational import UUID
 from .Principal import Principal
 from .UserGroup import UserGroup

 class Group(Principal):
    __tablename__ = "groups"

-    id = Column(UUID(as_uuid = True), ForeignKey("principals.id"), primary_key = True)
+    id = Column(UUID, ForeignKey("principals.id"), primary_key = True)

    name = Column(String, unique = True, nullable = False, index = True)

--- a/cognee/modules/users/models/Permission.py
+++ b/cognee/modules/users/models/Permission.py
@ -1,8 +1,8 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 # from sqlalchemy.orm import relationship
-from sqlalchemy import Column, DateTime, UUID, String
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, DateTime, String
+from cognee.infrastructure.databases.relational import Base, UUID

 class Permission(Base):
    __tablename__ = "permissions"
--- a/cognee/modules/users/models/Principal.py
+++ b/cognee/modules/users/models/Principal.py
@ -1,12 +1,12 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, String, DateTime, UUID
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, String, DateTime
+from cognee.infrastructure.databases.relational import Base, UUID

 class Principal(Base):
    __tablename__ = "principals"

-    id = Column(UUID(as_uuid = True), primary_key = True, index = True, default = uuid4)
+    id = Column(UUID, primary_key = True, index = True, default = uuid4)

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
--- a/cognee/modules/users/models/Resource.py
+++ b/cognee/modules/users/models/Resource.py
@ -1,18 +1,18 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship
-from sqlalchemy import Column, DateTime, UUID
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, DateTime
+from cognee.infrastructure.databases.relational import Base, UUID
 from .ACLResources import ACLResources

 class Resource(Base):
    __tablename__ = "resources"

-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))

-    resource_id = Column(UUID(as_uuid = True), nullable = False)
+    resource_id = Column(UUID, nullable = False)

    acls = relationship("ACL", secondary = ACLResources.__tablename__, back_populates = "resources")
--- a/cognee/modules/users/models/User.py
+++ b/cognee/modules/users/models/User.py
@ -1,14 +1,15 @@
 from uuid import UUID as uuid_UUID
-from sqlalchemy import ForeignKey, UUID, Column
+from sqlalchemy import ForeignKey, Column
 from sqlalchemy.orm import relationship, Mapped
 from fastapi_users.db import SQLAlchemyBaseUserTableUUID
+from cognee.infrastructure.databases.relational import UUID
 from .Principal import Principal
 from .UserGroup import UserGroup

 class User(SQLAlchemyBaseUserTableUUID, Principal):
    __tablename__ = "users"

-    id = Column(UUID(as_uuid = True), ForeignKey("principals.id"), primary_key = True)
+    id = Column(UUID, ForeignKey("principals.id"), primary_key = True)

    groups: Mapped[list["Group"]] = relationship(
        secondary = UserGroup.__tablename__,
--- a/cognee/modules/users/models/UserGroup.py
+++ b/cognee/modules/users/models/UserGroup.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, ForeignKey, DateTime, UUID
-from cognee.infrastructure.databases.relational import Base
+from sqlalchemy import Column, ForeignKey, DateTime
+from cognee.infrastructure.databases.relational import Base, UUID

 class UserGroup(Base):
    __tablename__ = "user_groups"

    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))

-    user_id = Column(UUID(as_uuid = True), ForeignKey("users.id"), primary_key = True)
-    group_id = Column(UUID(as_uuid = True), ForeignKey("groups.id"), primary_key = True)
+    user_id = Column(UUID, ForeignKey("users.id"), primary_key = True)
+    group_id = Column(UUID, ForeignKey("groups.id"), primary_key = True)
--- a/cognee/tasks/chunking/chunk_by_word.py
+++ b/cognee/tasks/chunking/chunk_by_word.py
@ -24,7 +24,7 @@ def chunk_by_word(data: str):
            while next_character is not None and (re.match(paragraph_endings, next_character) or next_character == " "):
                j += 1
                next_character = data[j] if j < len(data) else None
-            if next_character.isupper():
+            if next_character and next_character.isupper():
                return True

            return False
--- a/cognee/tasks/classify_documents/classify_documents.py
+++ b/cognee/tasks/classify_documents/classify_documents.py
@ -3,10 +3,10 @@ from cognee.modules.data.processing.document_types import Document, PdfDocument,

 def classify_documents(data_documents: list[Data]) -> list[Document]:
    documents = [
-        PdfDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location) if data_item.extension == "pdf" else
-        AudioDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location) if data_item.extension == "audio" else
-        ImageDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location) if data_item.extension == "image" else
-        TextDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location)
+        PdfDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else
+        AudioDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else
+        ImageDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else
+        TextDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
        for data_item in data_documents
    ]

--- a/cognee/tasks/infer_data_ontology/infer_data_ontology.py
+++ b/cognee/tasks/infer_data_ontology/infer_data_ontology.py
@ -89,7 +89,7 @@ class OntologyEngine:
            chunk_strategy = chunk_config.chunk_strategy

            for base_file in documents:
-                with open(base_file.file_path, "rb") as file:
+                with open(base_file.raw_data_location, "rb") as file:
                    try:
                        file_type = guess_file_type(file)
                        text = extract_text_from_file(file, file_type)
@ -175,7 +175,7 @@ async def infer_data_ontology(documents, ontology_model = KnowledgeGraph, root_n
        ontology_engine = OntologyEngine()
        root_node_id = await ontology_engine.add_graph_ontology(documents = documents)
    else:
-        graph_engine = get_graph_engine()
+        graph_engine = await get_graph_engine()
        await add_model_class_to_graph(ontology_model, graph_engine)

    yield (documents, root_node_id)
--- a/notebooks/cognee_demo_1.5.ipynb
+++ b/notebooks/cognee_demo_1.5.ipynb
@ -0,0 +1,512 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "958375a6ffc0c2e4",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:47.336283Z",
+     "start_time": "2024-09-20T14:02:43.652444Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "import logging\n",
+    "from typing import Union\n",
+    "\n",
+    "from cognee.modules.cognify.config import get_cognify_config\n",
+    "from cognee.shared.data_models import KnowledgeGraph\n",
+    "from cognee.modules.data.models import Dataset, Data\n",
+    "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
+    "from cognee.modules.data.methods import get_datasets, get_datasets_by_name\n",
+    "from cognee.modules.pipelines.tasks.Task import Task\n",
+    "from cognee.modules.pipelines import run_tasks, run_tasks_parallel\n",
+    "from cognee.modules.users.models import User\n",
+    "from cognee.modules.users.methods import get_default_user\n",
+    "from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status\n",
+    "from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status\n",
+    "from cognee.tasks import chunk_extract_summary, \\\n",
+    "    chunk_naive_llm_classifier, \\\n",
+    "    chunk_remove_disconnected, \\\n",
+    "    infer_data_ontology, \\\n",
+    "    save_chunks_to_store, \\\n",
+    "    chunk_update_check, \\\n",
+    "    chunks_into_graph, \\\n",
+    "    source_documents_to_chunks, \\\n",
+    "    check_permissions_on_documents, \\\n",
+    "    classify_documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "df16431d0f48b006",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:48.519686Z",
+     "start_time": "2024-09-20T14:02:48.515589Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "job_position = \"\"\"Senior Data Scientist (Machine Learning)\n",
+    "\n",
+    "Company: TechNova Solutions\n",
+    "Location: San Francisco, CA\n",
+    "\n",
+    "Job Description:\n",
+    "\n",
+    "TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights.\n",
+    "\n",
+    "Responsibilities:\n",
+    "\n",
+    "Develop and implement advanced machine learning algorithms and models.\n",
+    "Analyze large, complex datasets to extract meaningful patterns and insights.\n",
+    "Collaborate with cross-functional teams to integrate predictive models into products.\n",
+    "Stay updated with the latest advancements in machine learning and data science.\n",
+    "Mentor junior data scientists and provide technical guidance.\n",
+    "Qualifications:\n",
+    "\n",
+    "Master’s or Ph.D. in Data Science, Computer Science, Statistics, or a related field.\n",
+    "5+ years of experience in data science and machine learning.\n",
+    "Proficient in Python, R, and SQL.\n",
+    "Experience with deep learning frameworks (e.g., TensorFlow, PyTorch).\n",
+    "Strong problem-solving skills and attention to detail.\n",
+    "Candidate CVs\n",
+    "\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "9086abf3af077ab4",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:49.120838Z",
+     "start_time": "2024-09-20T14:02:49.118294Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "job_1 = \"\"\"\n",
+    "CV 1: Relevant\n",
+    "Name: Dr. Emily Carter\n",
+    "Contact Information:\n",
+    "\n",
+    "Email: emily.carter@example.com\n",
+    "Phone: (555) 123-4567\n",
+    "Summary:\n",
+    "\n",
+    "Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.\n",
+    "\n",
+    "Education:\n",
+    "\n",
+    "Ph.D. in Computer Science, Stanford University (2014)\n",
+    "B.S. in Mathematics, University of California, Berkeley (2010)\n",
+    "Experience:\n",
+    "\n",
+    "Senior Data Scientist, InnovateAI Labs (2016 – Present)\n",
+    "Led a team in developing machine learning models for natural language processing applications.\n",
+    "Implemented deep learning algorithms that improved prediction accuracy by 25%.\n",
+    "Collaborated with cross-functional teams to integrate models into cloud-based platforms.\n",
+    "Data Scientist, DataWave Analytics (2014 – 2016)\n",
+    "Developed predictive models for customer segmentation and churn analysis.\n",
+    "Analyzed large datasets using Hadoop and Spark frameworks.\n",
+    "Skills:\n",
+    "\n",
+    "Programming Languages: Python, R, SQL\n",
+    "Machine Learning: TensorFlow, Keras, Scikit-Learn\n",
+    "Big Data Technologies: Hadoop, Spark\n",
+    "Data Visualization: Tableau, Matplotlib\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a9de0cc07f798b7f",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:49.675003Z",
+     "start_time": "2024-09-20T14:02:49.671615Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "job_2 = \"\"\"\n",
+    "CV 2: Relevant\n",
+    "Name: Michael Rodriguez\n",
+    "Contact Information:\n",
+    "\n",
+    "Email: michael.rodriguez@example.com\n",
+    "Phone: (555) 234-5678\n",
+    "Summary:\n",
+    "\n",
+    "Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.\n",
+    "\n",
+    "Education:\n",
+    "\n",
+    "M.S. in Data Science, Carnegie Mellon University (2013)\n",
+    "B.S. in Computer Science, University of Michigan (2011)\n",
+    "Experience:\n",
+    "\n",
+    "Senior Data Scientist, Alpha Analytics (2017 – Present)\n",
+    "Developed machine learning models to optimize marketing strategies.\n",
+    "Reduced customer acquisition cost by 15% through predictive modeling.\n",
+    "Data Scientist, TechInsights (2013 – 2017)\n",
+    "Analyzed user behavior data to improve product features.\n",
+    "Implemented A/B testing frameworks to evaluate product changes.\n",
+    "Skills:\n",
+    "\n",
+    "Programming Languages: Python, Java, SQL\n",
+    "Machine Learning: Scikit-Learn, XGBoost\n",
+    "Data Visualization: Seaborn, Plotly\n",
+    "Databases: MySQL, MongoDB\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "185ff1c102d06111",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:50.286828Z",
+     "start_time": "2024-09-20T14:02:50.284369Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "job_3 = \"\"\"\n",
+    "CV 3: Relevant\n",
+    "Name: Sarah Nguyen\n",
+    "Contact Information:\n",
+    "\n",
+    "Email: sarah.nguyen@example.com\n",
+    "Phone: (555) 345-6789\n",
+    "Summary:\n",
+    "\n",
+    "Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.\n",
+    "\n",
+    "Education:\n",
+    "\n",
+    "M.S. in Statistics, University of Washington (2014)\n",
+    "B.S. in Applied Mathematics, University of Texas at Austin (2012)\n",
+    "Experience:\n",
+    "\n",
+    "Data Scientist, QuantumTech (2016 – Present)\n",
+    "Designed and implemented machine learning algorithms for financial forecasting.\n",
+    "Improved model efficiency by 20% through algorithm optimization.\n",
+    "Junior Data Scientist, DataCore Solutions (2014 – 2016)\n",
+    "Assisted in developing predictive models for supply chain optimization.\n",
+    "Conducted data cleaning and preprocessing on large datasets.\n",
+    "Skills:\n",
+    "\n",
+    "Programming Languages: Python, R\n",
+    "Machine Learning Frameworks: PyTorch, Scikit-Learn\n",
+    "Statistical Analysis: SAS, SPSS\n",
+    "Cloud Platforms: AWS, Azure\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d55ce4c58f8efb67",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:50.950343Z",
+     "start_time": "2024-09-20T14:02:50.946378Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "job_4 = \"\"\"\n",
+    "CV 4: Not Relevant\n",
+    "Name: David Thompson\n",
+    "Contact Information:\n",
+    "\n",
+    "Email: david.thompson@example.com\n",
+    "Phone: (555) 456-7890\n",
+    "Summary:\n",
+    "\n",
+    "Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.\n",
+    "\n",
+    "Education:\n",
+    "\n",
+    "B.F.A. in Graphic Design, Rhode Island School of Design (2012)\n",
+    "Experience:\n",
+    "\n",
+    "Senior Graphic Designer, CreativeWorks Agency (2015 – Present)\n",
+    "Led design projects for clients in various industries.\n",
+    "Created branding materials that increased client engagement by 30%.\n",
+    "Graphic Designer, Visual Innovations (2012 – 2015)\n",
+    "Designed marketing collateral, including brochures, logos, and websites.\n",
+    "Collaborated with the marketing team to develop cohesive brand strategies.\n",
+    "Skills:\n",
+    "\n",
+    "Design Software: Adobe Photoshop, Illustrator, InDesign\n",
+    "Web Design: HTML, CSS\n",
+    "Specialties: Branding and Identity, Typography\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ca4ecc32721ad332",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:51.548191Z",
+     "start_time": "2024-09-20T14:02:51.545520Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "job_5 = \"\"\"\n",
+    "CV 5: Not Relevant\n",
+    "Name: Jessica Miller\n",
+    "Contact Information:\n",
+    "\n",
+    "Email: jessica.miller@example.com\n",
+    "Phone: (555) 567-8901\n",
+    "Summary:\n",
+    "\n",
+    "Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.\n",
+    "\n",
+    "Education:\n",
+    "\n",
+    "B.A. in Business Administration, University of Southern California (2010)\n",
+    "Experience:\n",
+    "\n",
+    "Sales Manager, Global Enterprises (2015 – Present)\n",
+    "Managed a sales team of 15 members, achieving a 20% increase in annual revenue.\n",
+    "Developed sales strategies that expanded customer base by 25%.\n",
+    "Sales Representative, Market Leaders Inc. (2010 – 2015)\n",
+    "Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.\n",
+    "Skills:\n",
+    "\n",
+    "Sales Strategy and Planning\n",
+    "Team Leadership and Development\n",
+    "CRM Software: Salesforce, Zoho\n",
+    "Negotiation and Relationship Building\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "904df61ba484a8e5",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:54.243987Z",
+     "start_time": "2024-09-20T14:02:52.498195Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import cognee\n",
+    "from os import listdir, path\n",
+    "\n",
+    "data_path = path.abspath(\".data\")\n",
+    "\n",
+    "results = await cognee.add([job_1, job_2,job_3,job_4,job_5,job_position], \"example\")\n",
+    "\n",
+    "for result in results:\n",
+    "    print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "6f9b564de121713d",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:55.564445Z",
+     "start_time": "2024-09-20T14:02:55.562784Z"
+    }
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "8911f8bd4f8c440a",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:56.714408Z",
+     "start_time": "2024-09-20T14:02:56.711812Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# from enum import Enum, auto\n",
+    "# from typing import Optional, List, Union, Dict, Any\n",
+    "# from pydantic import BaseModel, Field\n",
+    "# \n",
+    "# class Node(BaseModel):\n",
+    "#     \"\"\"Node in a knowledge graph.\"\"\"\n",
+    "#     id: str\n",
+    "#     name: str\n",
+    "#     type: str\n",
+    "#     description: str\n",
+    "#     properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the node.\")\n",
+    "# \n",
+    "# class Edge(BaseModel):\n",
+    "#     \"\"\"Edge in a knowledge graph.\"\"\"\n",
+    "#     source_node_id: str\n",
+    "#     target_node_id: str\n",
+    "#     relationship_name: str\n",
+    "#     properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the edge.\")\n",
+    "# \n",
+    "# class KnowledgeGraph(BaseModel):\n",
+    "#     \"\"\"Knowledge graph.\"\"\"\n",
+    "#     nodes: List[Node] = Field(..., default_factory=list)\n",
+    "#     edges: List[Edge] = Field(..., default_factory=list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "7c431fdef4921ae0",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:57.925667Z",
+     "start_time": "2024-09-20T14:02:57.922353Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
+    "    data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
+    "\n",
+    "    try:\n",
+    "\n",
+    "        root_node_id = None\n",
+    "\n",
+    "        tasks = [\n",
+    "            Task(classify_documents),\n",
+    "            Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
+    "            Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),\n",
+    "            Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type\n",
+    "            Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = \"entities\", task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes\n",
+    "            Task(chunk_update_check, collection_name = \"chunks\"), # Find all affected chunks, so we don't process unchanged chunks\n",
+    "            Task(\n",
+    "                save_chunks_to_store,\n",
+    "                collection_name = \"chunks\",\n",
+    "            ), \n",
+    "            Task(chunk_remove_disconnected), # Remove the obsolete document chunks.\n",
+    "        ]\n",
+    "\n",
+    "        pipeline = run_tasks(tasks, data_documents)\n",
+    "\n",
+    "        async for result in pipeline:\n",
+    "            print(result)\n",
+    "    except Exception as error:\n",
+    "        raise error"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0a91b99c6215e09",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-20T14:02:58.905774Z",
+     "start_time": "2024-09-20T14:02:58.625915Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "user = await get_default_user()\n",
+    "datasets = await get_datasets_by_name([\"example\"], user.id)\n",
+    "await run_cognify_pipeline(datasets[0], user)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "080389e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from cognee.shared.utils import render_graph\n",
+    "from cognee.infrastructure.databases.graph import get_graph_engine\n",
+    "import graphistry\n",
+    "\n",
+    "# # Setting an environment variable\n",
+    "# os.environ[\"GRAPHISTRY_USERNAME\"] = placeholder\n",
+    "# os.environ[\"GRAPHISTRY_PASSWORD\"] = placeholder\n",
+    "\n",
+    "\n",
+    "graphistry.login(username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\"))\n",
+    "\n",
+    "graph_engine = await get_graph_engine()\n",
+    "\n",
+    "graph_url = await render_graph(graph_engine.graph)\n",
+    "print(graph_url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5e7dfc8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def search(\n",
+    "    vector_engine,\n",
+    "    collection_name: str,\n",
+    "    query_text: str = None,\n",
+    "):\n",
+    "    query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]\n",
+    "\n",
+    "    connection = await vector_engine.get_connection()\n",
+    "    collection = await connection.open_table(collection_name)\n",
+    "\n",
+    "    results = await collection.vector_search(query_vector).limit(10).to_pandas()\n",
+    "\n",
+    "    result_values = list(results.to_dict(\"index\").values())\n",
+    "\n",
+    "    return [dict(\n",
+    "        id = str(result[\"id\"]),\n",
+    "        payload = result[\"payload\"],\n",
+    "        score = result[\"_distance\"],\n",
+    "    ) for result in result_values]\n",
+    "\n",
+    "\n",
+    "from cognee.infrastructure.databases.vector import get_vector_engine\n",
+    "\n",
+    "vector_engine = get_vector_engine()\n",
+    "results = await search(vector_engine, \"entities\", \"sarah.nguyen@example.com\")\n",
+    "for result in results:\n",
+    "    print(result)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -38,12 +38,10 @@ greenlet = "^3.0.3"
 ruff = "^0.2.2"
 filetype = "^1.2.0"
 nltk = "^3.8.1"
-dlt = {extras = ["postgres"], version = "^0.5.2"}
-duckdb = {version = "^0.10.0", extras = ["dlt"]}
+dlt = {extras = ["sqlalchemy"], version = "^1.0.0"}
 overrides = "^7.7.0"
 aiofiles = "^23.2.1"
 qdrant-client = "^1.9.0"
-duckdb-engine = "0.13.0"
 graphistry = "^0.33.5"
 tenacity = "^8.2.3"
 weaviate-client = "4.6.7"
@ -75,14 +73,12 @@ asyncpg = "^0.29.0"


 [tool.poetry.extras]
-duckdb = ["duckdb"]
 filesystem = ["s3fs", "botocore"]
-motherduck = ["duckdb"]
 cli = ["pipdeptree", "cron-descriptor"]
 weaviate = ["weaviate-client"]
 qdrant = ["qdrant-client"]
-neo4j = ["neo4j", "py2neo"]
-notebook = ["ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
+neo4j = ["neo4j"]
+notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]

 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.0"