feat: add sqlalchemy as dlt destination (#137)

* feat: add sqlalchemy as dlt destination * Fix the demo, update Readme * fix: add 1.5 notebook --------- Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
2024-09-21 15:58:28 +02:00 · 2024-09-21 15:58:28 +02:00 · a9433e9283
commit a9433e9283
parent a09f7991e2
56 changed files with 2435 additions and 2554 deletions
--- a/.github/workflows/test_neo4j.yml
+++ b/.github/workflows/test_neo4j.yml
@ -18,13 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml
  setup_docker:
    name: Set up Docker Buildx
    runs-on: ubuntu-latest
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
  run_neo4j_integration_test:
    name: test
    needs: get_docs_changes
@ -35,18 +28,6 @@ jobs:
      run:
        shell: bash
    services:
      postgres:
        image: postgres:latest
        env:
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        volumes:
          - postgres_data:/var/lib/postgresql/data
        ports:
          - 5432:5432
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -66,18 +47,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction
      - name: Create .cognee_system directory and print path
        run: |
          mkdir .cognee_system
          echo $(pwd)/.cognee_system
      - name: Wait for PostgreSQL to be ready
        run: |
          echo "Waiting for PostgreSQL to be ready..."
          until pg_isready -h localhost -p 5432 -U cognee; do
            sleep 1
          done
      - name: Run default Neo4j
        env:
          ENV: 'dev'
@ -85,14 +54,4 @@ jobs:
          GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
          GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
          GRAPH_DATABASE_USERNAME: "neo4j"
          DB_USER: cognee
          DB_PASSWORD: cognee
          DB_NAME: cognee_db
          DB_HOST: localhost
          DB_PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_neo4j.py
--- a/.github/workflows/test_python_3_10.yml
+++ b/.github/workflows/test_python_3_10.yml
@ -18,15 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml
  setup_docker:
    name: Set up Docker Buildx
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
  run_common:
    name: test
    needs: get_docs_changes
@ -38,19 +29,6 @@ jobs:
      run:
        shell: bash
    services:
      postgres:
        image: postgres:latest
        env:
          PGUSER: cognee
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        volumes:
          - postgres_data:/var/lib/postgresql/data
        ports:
          - 5432:5432
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -71,23 +49,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction
      - name: Create .cognee_system directory and print path
        run: |
          mkdir .cognee_system
          echo $(pwd)/.cognee_system
      - name: Wait for PostgreSQL to be ready
        env:
          PGUSER: cognee
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        run: |
          echo "Waiting for PostgreSQL to be ready..."
          until pg_isready -h localhost -p 5432 -U cognee; do
            sleep 1
          done
      - name: Run tests
        run: poetry run pytest tests/
@ -95,16 +56,6 @@ jobs:
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          DB_HOST: localhost
          DB_USERNAME: cognee
          DB_PASSWORD: cognee
          DB_DATABASE: cognee_db
          DB_PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_library.py
      - name: Clean up disk space
--- a/.github/workflows/test_python_3_11.yml
+++ b/.github/workflows/test_python_3_11.yml
@ -18,15 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml
  setup_docker:
    name: Set up Docker Buildx
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
  run_common:
    name: test
    needs: get_docs_changes
@ -38,19 +29,6 @@ jobs:
      run:
        shell: bash
    services:
      postgres:
        image: postgres:latest
        env:
          PGUSER: cognee
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        volumes:
          - postgres_data:/var/lib/postgresql/data
        ports:
          - 5432:5432
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -71,23 +49,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction
      - name: Create .cognee_system directory and print path
        run: |
          mkdir .cognee_system
          echo $(pwd)/.cognee_system
      - name: Wait for PostgreSQL to be ready
        env:
          PGUSER: cognee
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        run: |
          echo "Waiting for PostgreSQL to be ready..."
          until pg_isready -h localhost -p 5432 -U cognee; do
            sleep 1
          done
      - name: Run tests
        run: poetry run pytest tests/
@ -95,16 +56,6 @@ jobs:
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          DB_HOST: localhost
          DB_USERNAME: cognee
          DB_PASSWORD: cognee
          DB_DATABASE: cognee_db
          DB_PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_library.py
      - name: Clean up disk space
--- a/.github/workflows/test_python_3_9.yml
+++ b/.github/workflows/test_python_3_9.yml
@ -18,15 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml
  setup_docker:
    name: Set up Docker Buildx
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
  run_common:
    name: test
    needs: get_docs_changes
@ -38,19 +29,6 @@ jobs:
      run:
        shell: bash
    services:
      postgres:
        image: postgres:latest
        env:
          PGUSER: cognee
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        volumes:
          - postgres_data:/var/lib/postgresql/data
        ports:
          - 5432:5432
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -71,23 +49,6 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction
      - name: Create .cognee_system directory and print path
        run: |
          mkdir .cognee_system
          echo $(pwd)/.cognee_system
      - name: Wait for PostgreSQL to be ready
        env:
          PGUSER: cognee
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        run: |
          echo "Waiting for PostgreSQL to be ready..."
          until pg_isready -h localhost -p 5432 -U cognee; do
            sleep 1
          done
      - name: Run tests
        run: poetry run pytest tests/
@ -95,16 +56,6 @@ jobs:
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          DB_HOST: localhost
          DB_USERNAME: cognee
          DB_PASSWORD: cognee
          DB_DATABASE: cognee_db
          DB_PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_library.py
      - name: Clean up disk space
--- a/.github/workflows/test_qdrant.yml
+++ b/.github/workflows/test_qdrant.yml
@ -18,13 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml
  setup_docker:
    name: Set up Docker Buildx
    runs-on: ubuntu-latest
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
  run_qdrant_integration_test:
    name: test
    needs: get_docs_changes
@ -35,18 +28,6 @@ jobs:
      run:
        shell: bash
    services:
      postgres:
        image: postgres:latest
        env:
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        volumes:
          - postgres_data:/var/lib/postgresql/data
        ports:
          - 5432:5432
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -66,32 +47,10 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction
      - name: Create .cognee_system directory and print path
        run: |
          mkdir .cognee_system
          echo $(pwd)/.cognee_system
      - name: Wait for PostgreSQL to be ready
        run: |
          echo "Waiting for PostgreSQL to be ready..."
          until pg_isready -h localhost -p 5432 -U cognee; do
            sleep 1
          done
      - name: Run default Qdrant
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
          VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
          DB_USER: cognee
          DB_PASSWORD: cognee
          DB_NAME: cognee_db
          DB_HOST: localhost
          DB_PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_qdrant.py
--- a/.github/workflows/test_weaviate.yml
+++ b/.github/workflows/test_weaviate.yml
@ -18,13 +18,6 @@ jobs:
    name: docs changes
    uses: ./.github/workflows/get_docs_changes.yml
  setup_docker:
    name: Set up Docker Buildx
    runs-on: ubuntu-latest
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
  run_weaviate_integration_test:
    name: test
    needs: get_docs_changes
@ -35,18 +28,6 @@ jobs:
      run:
        shell: bash
    services:
      postgres:
        image: postgres:latest
        env:
          POSTGRES_USER: cognee
          POSTGRES_PASSWORD: cognee
          POSTGRES_DB: cognee_db
        volumes:
          - postgres_data:/var/lib/postgresql/data
        ports:
          - 5432:5432
    steps:
      - name: Check out
        uses: actions/checkout@master
@ -66,32 +47,10 @@ jobs:
      - name: Install dependencies
        run: poetry install --no-interaction
      - name: Create .cognee_system directory and print path
        run: |
          mkdir .cognee_system
          echo $(pwd)/.cognee_system
      - name: Wait for PostgreSQL to be ready
        run: |
          echo "Waiting for PostgreSQL to be ready..."
          until pg_isready -h localhost -p 5432 -U cognee; do
            sleep 1
          done
      - name: Run default Weaviate
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }}
          VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }}
          DB_USER: cognee
          DB_PASSWORD: cognee
          DB_NAME: cognee_db
          DB_HOST: localhost
          DB_PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__HOST: localhost
          DESTINATION__POSTGRES__CREDENTIALS__PORT: 5432
          DESTINATION__POSTGRES__CREDENTIALS__USERNAME: cognee
          DESTINATION__POSTGRES__CREDENTIALS__PASSWORD: cognee
          DESTINATION__POSTGRES__CREDENTIALS__DATABASE: cognee_db
        run: poetry run python ./cognee/tests/test_weaviate.py
--- a/README.md
+++ b/README.md
@ -18,24 +18,12 @@ We build for developers who need a reliable, production-ready data layer for AI
  </a>
 </p>
 cognee implements scalable, modular data pipelines that allow for creating the LLM-enriched data layer using graph and vector stores.
 <p>
  <i> cognee aims to be dbt for LLMOps</i>
 </p>
 ## What is cognee? 
 cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you ability to interconnect and retrieve past conversations, documents, audio transcriptions, while also reducing hallucinations, developer effort and cost.
 Try it in a Google collab  <a href="https://colab.research.google.com/drive/1jayZ5JRwDaUGFvCw9UZySBG-iB9gpYfu?usp=sharing">notebook</a>  or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a>
 If you have questions, join our  <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community
@ -58,7 +46,7 @@ poetry add cognee
 ```
-## 💻 Usage
+## 💻 Basic Usage
 ### Setup
@ -75,24 +63,6 @@ cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
 ```
 You can use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>
 In the next step make sure to launch a Postgres instance. Here is an example from our docker-compose:
 ```
  postgres:
    image: postgres:latest
    container_name: postgres
    environment:
      POSTGRES_USER: cognee
      POSTGRES_PASSWORD: cognee
      POSTGRES_DB: cognee_db
    volumes:
      - postgres_data:/var/lib/postgresql/data
    ports:
      - 5432:5432
    networks:
      - cognee-network
 ```
 If you are using Networkx, create an account on Graphistry to visualize results:
 ```
@ -106,12 +76,7 @@ docker-compose up cognee
 ```
 Then navigate to localhost:3000/wizard
-### Run the default example
+### Simple example
 Make sure to launch the Postgres instance first. Navigate to the cognee folder and run:
 ```
 docker compose up postgres
 ```
 Run the default cognee pipeline:
@ -123,7 +88,7 @@ text = """Natural language processing (NLP) is an interdisciplinary
 await cognee.add([text], "example_dataset") # Add a new piece of information
-await cognee.cognify() # Use LLMs and cognee to create knowledge
+await cognee.cognify() # Use LLMs and cognee to create a semantic graph
 await search_results = cognee.search("SIMILARITY", {'query': 'Tell me about NLP'}) # Query cognee for the knowledge
@ -132,19 +97,20 @@ print(search_results)
 ```
-### Create your pipelines
+### Create your own memory store
 cognee framework consists of tasks that can be grouped into pipelines.
 Each task can be an independent part of business logic, that can be tied to other tasks to form a pipeline.
 These tasks persist data into your memory store enabling you to search for relevant context of past conversations, documents, or any other data you have stored.
 ### Example: Classify your documents
 cognee framework consists of tasks that can be grouped into pipelines. Each task can be an independent part of business logic, that can be tied to other tasks to form a pipeline.
 Here is an example of how it looks for a default cognify pipeline:
 1. To prepare the data for the pipeline run, first we need to add it to our metastore and normalize it:
-Start with: 
+Start with:
 ```
 docker compose up postgres
 ```
 And then run: 
 ```
 text = """Natural language processing (NLP) is an interdisciplinary
       subfield of computer science and information retrieval"""
@ -158,90 +124,62 @@ Here we show an example of creating a naive LLM classifier that takes a Pydantic
 We provided just a snippet for reference, but feel free to check out the implementation in our repo. 
 ```
-async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]):
+async def chunk_naive_llm_classifier(
-    if len(data_chunks) == 0:
+    data_chunks: list[DocumentChunk],
-        return data_chunks
+    classification_model: Type[BaseModel]
-
+):
    # Extract classifications asynchronously
    chunk_classifications = await asyncio.gather(
-        *[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
+        *(extract_categories(chunk.text, classification_model) for chunk in data_chunks)
    )
-    classification_data_points = []
+    # Collect classification data points using a set to avoid duplicates
-
+    classification_data_points = {
-    for chunk_index, chunk in enumerate(data_chunks):
+        uuid5(NAMESPACE_OID, cls.label.type)
-        chunk_classification = chunk_classifications[chunk_index]
+        for cls in chunk_classifications
-        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
+    } | {
-        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
+        uuid5(NAMESPACE_OID, subclass.value)
-
+        for cls in chunk_classifications
-        for classification_subclass in chunk_classification.label.subclass:
+        for subclass in cls.label.subclass
-            classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
+    }
    vector_engine = get_vector_engine()
    collection_name = "classification"
    # Define the payload schema
    class Keyword(BaseModel):
        uuid: str
        text: str
        chunk_id: str
        document_id: str
-    collection_name = "classification"
+    # Ensure the collection exists and retrieve existing data points
-
+    if not await vector_engine.has_collection(collection_name):
-    if await vector_engine.has_collection(collection_name):
+        await vector_engine.create_collection(collection_name, payload_schema=Keyword)
-        existing_data_points = await vector_engine.retrieve(
+        existing_points_map = {}
            collection_name,
            list(set(classification_data_points)),
        ) if len(classification_data_points) > 0 else []
        existing_points_map = {point.id: True for point in existing_data_points}
    else:
        existing_points_map = {}
-        await vector_engine.create_collection(collection_name, payload_schema=Keyword)
+    return data_chunks
    data_points = []
    nodes = []
    edges = []
    for (chunk_index, data_chunk) in enumerate(data_chunks):
        chunk_classification = chunk_classifications[chunk_index]
        classification_type_label = chunk_classification.label.type
        classification_type_id = uuid5(NAMESPACE_OID, classification_type_label)
 ...
 ```
-To see existing tasks, have a look at the cognee.tasks
+We have a large number of tasks that can be used in your pipelines, and you can also create your own tasks to fit your business logic.
 3. Once we have our tasks, it is time to group them into a pipeline.
-This snippet shows how a group of tasks can be added to a pipeline, and how they can pass the information forward from one to another. 
+This simplified snippet demonstrates how tasks can be added to a pipeline, and how they can pass the information forward from one to another. 
 ```
-            tasks = [
+            
                Task(document_to_ontology, root_node_id = root_node_id),
                Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type
                Task(chunk_to_graph_decomposition, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data
                Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
                Task(chunk_update_check, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks
                Task(
                    save_chunks_to_store,
                    collection_name = "chunks",
                ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other)
                run_tasks_parallel([
                    Task(
                        chunk_extract_summary,
                        summarization_model = cognee_config.summarization_model,
                        collection_name = "chunk_summaries",
                    ), # Summarize the document chunks
                    Task(
                        chunk_naive_llm_classifier,
                        classification_model = cognee_config.classification_model,
                    ),
                ]),
                Task(chunk_remove_disconnected), # Remove the obsolete document chunks.
            ]
-            pipeline = run_tasks(tasks, documents)
+Task(
    chunk_naive_llm_classifier,
    classification_model = cognee_config.classification_model,
 )
 pipeline = run_tasks(tasks, documents)
 ```
@ -277,3 +215,23 @@ Check out our demo notebook [here](https://github.com/topoteretes/cognee/blob/ma
 [![Star History Chart](https://api.star-history.com/svg?repos=topoteretes/cognee&type=Date)](https://star-history.com/#topoteretes/cognee&Date)
 ## Get Started
 ### Install Server
 Please see the [cognee Quick Start Guide](https://topoteretes.github.io/cognee/quickstart/) for important configuration information.
 ```bash
 docker compose up
 ```
 ### Install SDK
 Please see the cognee [Develoment Guide](https://topoteretes.github.io/cognee/quickstart/) for important beta information and usage instructions.
 ```bash
 pip install cognee
 ```
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -14,8 +14,6 @@ from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.infrastructure.databases.relational import create_db_and_tables
 # Set up logging
 logging.basicConfig(
    level=logging.INFO,  # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
@ -34,8 +32,12 @@ from contextlib import asynccontextmanager
@asynccontextmanager
 async def lifespan(app: FastAPI):
    from cognee.infrastructure.databases.relational import create_db_and_tables
    from cognee.modules.users.methods import get_default_user
    # Not needed if you setup a migration system like Alembic
    await create_db_and_tables()
    await get_default_user()
    yield
 app = FastAPI(debug = os.getenv("ENV") != "prod", lifespan = lifespan)
@ -394,10 +396,10 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000):
    try:
        logger.info("Starting server at %s:%s", host, port)
-        import asyncio
+        # import asyncio
-        from cognee.modules.data.deletion import prune_system, prune_data
+        # from cognee.modules.data.deletion import prune_system, prune_data
-        asyncio.run(prune_data())
+        # asyncio.run(prune_data())
-        asyncio.run(prune_system(metadata = True))
+        # asyncio.run(prune_system(metadata = True))
        uvicorn.run(app, host = host, port = port)
    except Exception as e:
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@ -2,7 +2,6 @@ from typing import List, Union, BinaryIO
 from os import path
 import asyncio
 import dlt
 import duckdb
 import cognee.modules.ingestion as ingestion
 from cognee.infrastructure.files.storage import LocalStorage
@ -81,22 +80,16 @@ async def add_files(file_paths: List[str], dataset_name: str, user: User = None)
    relational_config = get_relational_config()
-    if relational_config.db_provider == "duckdb":
+    destination = dlt.destinations.sqlalchemy(
-        db = duckdb.connect(relational_config.db_file_path)
+        credentials = {
-
+            "host": relational_config.db_host,
-        destination = dlt.destinations.duckdb(
+            "port": relational_config.db_port,
-          credentials = db,
+            "username": relational_config.db_username,
-        )
+            "password": relational_config.db_password,
-    else:
+            "database": relational_config.db_name,
-        destination = dlt.destinations.postgres(
+            "drivername": relational_config.db_provider,
-            credentials = {
+        },
-                "host": relational_config.db_host,
+    )
                "port": relational_config.db_port,
                "user": relational_config.db_user,
                "password": relational_config.db_password,
                "database": relational_config.db_name,
            },
        )
    pipeline = dlt.pipeline(
        pipeline_name = "file_load_from_filesystem",
--- a/cognee/api/v1/cognify/cognify_v2.py
+++ b/cognee/api/v1/cognify/cognify_v2.py
@ -46,72 +46,6 @@ async def cognify(datasets: Union[str, list[str]] = None, user: User = None):
    if type(datasets[0]) == str:
        datasets = await get_datasets_by_name(datasets, user.id)
    async def run_cognify_pipeline(dataset: Dataset):
        data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)
        document_ids_str = [str(document.id) for document in data_documents]
        dataset_id = dataset.id
        dataset_name = generate_dataset_name(dataset.name)
        async with update_status_lock:
            task_status = await get_pipeline_status([dataset_id])
            if dataset_id in task_status and task_status[dataset_id] == "DATASET_PROCESSING_STARTED":
                logger.info("Dataset %s is already being processed.", dataset_name)
                return
            await log_pipeline_status(dataset_id, "DATASET_PROCESSING_STARTED", {
                "dataset_name": dataset_name,
                "files": document_ids_str,
            })
        try:
            cognee_config = get_cognify_config()
            root_node_id = None
            tasks = [
                Task(classify_documents),
                Task(check_permissions_on_documents, user = user, permissions = ["write"]),
                Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),
                Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type
                Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = "entities", task_config = { "batch_size": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
                Task(chunk_update_check, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks
                Task(
                    save_chunks_to_store,
                    collection_name = "chunks",
                ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other)
                run_tasks_parallel([
                    Task(
                        chunk_extract_summary,
                        summarization_model = cognee_config.summarization_model,
                        collection_name = "chunk_summaries",
                    ), # Summarize the document chunks
                    Task(
                        chunk_naive_llm_classifier,
                        classification_model = cognee_config.classification_model,
                    ),
                ]),
                Task(chunk_remove_disconnected), # Remove the obsolete document chunks.
            ]
            pipeline = run_tasks(tasks, data_documents)
            async for result in pipeline:
                print(result)
            await log_pipeline_status(dataset_id, "DATASET_PROCESSING_FINISHED", {
                "dataset_name": dataset_name,
                "files": document_ids_str,
            })
        except Exception as error:
            await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERROR", {
                "dataset_name": dataset_name,
                "files": document_ids_str,
            })
            raise error
    existing_datasets_map = {
        generate_dataset_name(dataset.name): True for dataset in existing_datasets
    }
@ -122,10 +56,76 @@ async def cognify(datasets: Union[str, list[str]] = None, user: User = None):
        dataset_name = generate_dataset_name(dataset.name)
        if dataset_name in existing_datasets_map:
-            awaitables.append(run_cognify_pipeline(dataset))
+            awaitables.append(run_cognify_pipeline(dataset, user))
    return await asyncio.gather(*awaitables)
 async def run_cognify_pipeline(dataset: Dataset, user: User):
    data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)
    document_ids_str = [str(document.id) for document in data_documents]
    dataset_id = dataset.id
    dataset_name = generate_dataset_name(dataset.name)
    async with update_status_lock:
        task_status = await get_pipeline_status([dataset_id])
        if dataset_id in task_status and task_status[dataset_id] == "DATASET_PROCESSING_STARTED":
            logger.info("Dataset %s is already being processed.", dataset_name)
            return
        await log_pipeline_status(dataset_id, "DATASET_PROCESSING_STARTED", {
            "dataset_name": dataset_name,
            "files": document_ids_str,
        })
    try:
        cognee_config = get_cognify_config()
        root_node_id = None
        tasks = [
            Task(classify_documents),
            Task(check_permissions_on_documents, user = user, permissions = ["write"]),
            Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),
            Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type
            Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = "entities", task_config = { "batch_size": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
            Task(chunk_update_check, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks
            Task(
                save_chunks_to_store,
                collection_name = "chunks",
            ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other)
            run_tasks_parallel([
                Task(
                    chunk_extract_summary,
                    summarization_model = cognee_config.summarization_model,
                    collection_name = "chunk_summaries",
                ), # Summarize the document chunks
                Task(
                    chunk_naive_llm_classifier,
                    classification_model = cognee_config.classification_model,
                ),
            ]),
            Task(chunk_remove_disconnected), # Remove the obsolete document chunks.
        ]
        pipeline = run_tasks(tasks, data_documents)
        async for result in pipeline:
            print(result)
        await log_pipeline_status(dataset_id, "DATASET_PROCESSING_FINISHED", {
            "dataset_name": dataset_name,
            "files": document_ids_str,
        })
    except Exception as error:
        await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERROR", {
            "dataset_name": dataset_name,
            "files": document_ids_str,
        })
        raise error
 def generate_dataset_name(dataset_name: str) -> str:
    return dataset_name.replace(".", "_").replace(" ", "_")
--- a/cognee/infrastructure/databases/relational/DatabaseEngine.py
+++ b/cognee/infrastructure/databases/relational/DatabaseEngine.py
@ -1,23 +0,0 @@
 from typing import Protocol
 class DatabaseEngine(Protocol):
    async def ensure_tables(self):
        pass
    def database_exists(self, db_name: str) -> bool:
        pass
    def create_database(self, db_name: str):
        pass
    def drop_database(self, db_name: str):
        pass
    async def table_exists(self, table_name: str) -> bool:
        pass
    async def create_tables(self):
        pass
    async def create(self, data):
        pass
--- a/cognee/infrastructure/databases/relational/FakeAsyncSession.py
+++ b/cognee/infrastructure/databases/relational/FakeAsyncSession.py
@ -1,29 +0,0 @@
 import inspect
 from typing import Any
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import Session
 class FakeAsyncSession:
    def __init__(self, session: Session):
        self.session = session
    def run_sync(self, *args, **kwargs):
        return self.execute(*args, **kwargs)
    def __getattr__(self, name: str) -> Any:
        """
        If the method being called is async in AsyncSession, create a fake async version
        for Session so callers can `await` as usual. Think `commit`, `refresh`,
        `delete`, etc.
        """
        async_session_attr = getattr(AsyncSession, name, None)
        session_attr = getattr(self.session, name)
        if not inspect.iscoroutinefunction(async_session_attr):
            return session_attr
        async def async_wrapper(*args, **kwargs):
            return session_attr(*args, **kwargs)
        return async_wrapper
--- a/cognee/infrastructure/databases/relational/init.py
+++ b/cognee/infrastructure/databases/relational/init.py
@ -1,7 +1,7 @@
 from .ModelBase import Base
 from .DatabaseEngine import DatabaseEngine
 from .sqlite.SqliteEngine import SqliteEngine
 from .duckdb.DuckDBAdapter import DuckDBAdapter
 from .config import get_relational_config
 from .create_db_and_tables import create_db_and_tables
 from .get_relational_engine import get_relational_engine
 # Global data types
 from .data_types.UUID import UUID
--- a/cognee/infrastructure/databases/relational/config.py
+++ b/cognee/infrastructure/databases/relational/config.py
@ -1,4 +1,5 @@
 import os
 from typing import Union
 from functools import lru_cache
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from cognee.root_dir import get_absolute_path
@ -6,13 +7,11 @@ from cognee.root_dir import get_absolute_path
 class RelationalConfig(BaseSettings):
    db_path: str =  os.path.join(get_absolute_path(".cognee_system"), "databases")
    db_name: str =  "cognee_db"
-    db_host: str =  "localhost"
+    db_host: Union[str, None] = None # "localhost"
-    db_port: str =  "5432"
+    db_port: Union[str, None] = None # "5432"
-    db_user: str = "cognee"
+    db_username: Union[str, None] = None # "cognee"
-    db_password: str =  "cognee"
+    db_password: Union[str, None] = None # "cognee"
-    db_provider: str = "postgresql+asyncpg"
+    db_provider: str = "sqlite"
    db_file_path: str = os.path.join(db_path, db_name)
    model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
@ -22,7 +21,7 @@ class RelationalConfig(BaseSettings):
            "db_name": self.db_name,
            "db_host": self.db_host,
            "db_port": self.db_port,
-            "db_user": self.db_user,
+            "db_username": self.db_username,
            "db_password": self.db_password,
            "db_provider": self.db_provider,
        }
--- a/cognee/infrastructure/databases/relational/create_db_and_tables.py
+++ b/cognee/infrastructure/databases/relational/create_db_and_tables.py
@ -1,9 +1,14 @@
 from cognee.infrastructure.files.storage import LocalStorage
 from .ModelBase import Base
-from .get_relational_engine import get_relational_engine
+from .get_relational_engine import get_relational_engine, get_relational_config
 async def create_db_and_tables():
    relational_config = get_relational_config()
    relational_engine = get_relational_engine()
    if relational_engine.engine.dialect.name == "sqlite":
        LocalStorage.ensure_directory_exists(relational_config.db_path)
    async with relational_engine.engine.begin() as connection:
        if len(Base.metadata.tables.keys()) > 0:
            await connection.run_sync(Base.metadata.create_all)
--- a/cognee/infrastructure/databases/relational/create_relational_engine.py
+++ b/cognee/infrastructure/databases/relational/create_relational_engine.py
@ -3,18 +3,16 @@ from .sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
 def create_relational_engine(
    db_path: str,
    db_name: str,
    db_provider: str,
    db_host: str,
    db_port: str,
-    db_user: str,
+    db_username: str,
    db_password: str,
    db_provider: str,
 ):
-    return SQLAlchemyAdapter(
+    if db_provider == "sqlite":
-        db_name = db_name,
+        connection_string = f"sqlite+aiosqlite:///{db_path}/{db_name}"
-        db_path = db_path,
+
-        db_type = db_provider,
+    if db_provider == "postgres":
-        db_host = db_host,
+        connection_string = f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
-        db_port = db_port,
+
-        db_user = db_user,
+    return SQLAlchemyAdapter(connection_string)
        db_password = db_password
    )
--- a/cognee/infrastructure/databases/relational/data_types/UUID.py
+++ b/cognee/infrastructure/databases/relational/data_types/UUID.py
@ -0,0 +1,43 @@
 import uuid
 from sqlalchemy.types import TypeDecorator, BINARY
 from sqlalchemy.dialects.postgresql import UUID as psqlUUID
 class UUID(TypeDecorator):
    """Platform-independent GUID type.
    Uses Postgresql's UUID type, otherwise uses
    BINARY(16), to store UUID.
    """
    impl = BINARY
    def load_dialect_impl(self, dialect):
        if dialect.name == 'postgresql':
            return dialect.type_descriptor(psqlUUID())
        else:
            return dialect.type_descriptor(BINARY(16))
    def process_bind_param(self, value, dialect):
        if value is None:
            return value
        else:
            if not isinstance(value, uuid.UUID):
                if isinstance(value, bytes):
                    value = uuid.UUID(bytes = value)
                elif isinstance(value, int):
                    value = uuid.UUID(int = value)
                elif isinstance(value, str):
                    value = uuid.UUID(value)
        if dialect.name == 'postgresql':
            return str(value)
        else:
            return value.bytes
    def process_result_value(self, value, dialect):
        if value is None:
            return value
        if dialect.name == 'postgresql':
            return uuid.UUID(value)
        else:
            return uuid.UUID(bytes = value)
--- a/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py
+++ b/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py
@ -1,169 +0,0 @@
 import duckdb
 import os
 class DuckDBAdapter():
    def __init__(self, db_path: str, db_name: str):
        self.db_location = os.path.abspath(os.path.join(db_path, db_name))
        self.get_connection = lambda: duckdb.connect(self.db_location)
    def get_datasets(self):
        with self.get_connection() as connection:
            tables = connection.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").to_df().to_dict("list")
        return list(
            filter(
                lambda schema_name: not schema_name.endswith("staging") and schema_name != "cognee",
                tables["schema_name"]
            )
        )
    def get_files_metadata(self, dataset_name: str):
        with self.get_connection() as connection:
            return connection.sql(f"SELECT id, name, file_path, extension, mime_type FROM {dataset_name}.file_metadata;").to_df().to_dict("records")
    def create_table(self, schema_name: str, table_name: str, table_config: list[dict]):
        fields_query_parts = []
        for table_config_item in table_config:
            fields_query_parts.append(f"{table_config_item['name']} {table_config_item['type']}")
        with self.get_connection() as connection:
            query = f"CREATE SCHEMA IF NOT EXISTS {schema_name};"
            connection.execute(query)
        with self.get_connection() as connection:
            query = f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} ({', '.join(fields_query_parts)});"
            connection.execute(query)
    def delete_table(self, table_name: str):
        with self.get_connection() as connection:
            query = f"DROP TABLE IF EXISTS {table_name};"
            connection.execute(query)
    def insert_data(self, schema_name: str, table_name: str, data: list[dict]):
        def get_values(data_entry: list):
            return ", ".join([f"'{value}'" if isinstance(value, str) else value for value in data_entry])
        columns = ", ".join(data[0].keys())
        values = ", ".join([f"({get_values(data_entry.values())})" for data_entry in data])
        with self.get_connection() as connection:
            query = f"INSERT INTO {schema_name}.{table_name} ({columns}) VALUES {values};"
            connection.execute(query)
    def get_data(self, table_name: str, filters: dict = None):
        with self.get_connection() as connection:
            def get_values(values: list):
                return ", ".join([f"'{value}'" for value in values])
            def get_filters(filters: dict):
                return " AND ".join([
                    f"{key} IN ({get_values(value)})" if isinstance(value, list)
                    else f"{key} = '{value}'" for (key, value) in filters.items()
                ])
            query = f"SELECT * FROM {table_name}" + (";" if filters is None else f" WHERE {get_filters(filters)};")
            results = connection.sql(query).to_df().to_dict("records")
            return {
                result["data_id"]: result["status"] for result in results
            }
    def execute_query(self, query):
        with self.get_connection() as connection:
            return connection.sql(query).to_df().to_dict("records")
    def load_cognify_data(self, data):
        with self.get_connection() as connection:
            # Ensure the "cognify" table exists
            connection.execute("""
                CREATE TABLE IF NOT EXISTS cognify (
                    document_id STRING,
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    updated_at TIMESTAMP DEFAULT NULL,
                    processed BOOLEAN DEFAULT FALSE,
                    document_id_target STRING NULL
                );
            """)
        # Prepare the insert statement
        insert_query = """
            INSERT INTO cognify (document_id)
            VALUES (?);
        """
        # Insert each record into the "cognify" table
        for record in data:
            with self.get_connection() as connection:
                connection.execute(insert_query, [
                    record.get("document_id"),
                ])
    def fetch_cognify_data(self, excluded_document_id: str):
        # SQL command to create the "cognify" table with the specified columns
        create_table_sql = """
        CREATE TABLE IF NOT EXISTS cognify (
            document_id STRING,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            updated_at TIMESTAMP DEFAULT NULL,
            processed BOOLEAN DEFAULT FALSE,
            document_id_target STRING NULL
        );
        """
        with self.get_connection() as connection:
            # Execute the SQL command to create the table
            connection.execute(create_table_sql)
        # SQL command to select data from the "cognify" table
        select_data_sql = f"SELECT document_id, created_at, updated_at, processed FROM cognify WHERE document_id != '{excluded_document_id}' AND processed = FALSE;"
        with self.get_connection() as connection:
            # Execute the query and fetch the results
            records = connection.sql(select_data_sql).to_df().to_dict("records")
        # If records are fetched, update the "processed" column to "True"
        if records:
            # Fetching document_ids from the records to update the "processed" column
            document_ids = tuple(record["document_id"] for record in records)
            # SQL command to update the "processed" column to "True" for fetched records
            update_data_sql = f"UPDATE cognify SET processed = TRUE WHERE document_id IN {document_ids};"
            with self.get_connection() as connection:
                # Execute the update query
                connection.execute(update_data_sql)
        # Return the fetched records
        return records
    def delete_cognify_data(self):
        # SQL command to create the "cognify" table with the specified columns
        create_table_sql = """
        CREATE TABLE IF NOT EXISTS cognify (
            document_id STRING,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            updated_at TIMESTAMP DEFAULT NULL,
            processed BOOLEAN DEFAULT FALSE,
            document_id_target STRING NULL
        );
        """
        with self.get_connection() as connection:
            # Execute the SQL command to create the table
            connection.execute(create_table_sql)
        with self.get_connection() as connection:
            # SQL command to select data from the "cognify" table
            select_data_sql = "DELETE FROM cognify;"
            connection.sql(select_data_sql)
            drop_data_sql = "DROP TABLE cognify;"
            connection.sql(drop_data_sql)
    def delete_database(self):
        from cognee.infrastructure.files.storage import LocalStorage
        if LocalStorage.file_exists(self.db_location):
            LocalStorage.remove(self.db_location)
        if LocalStorage.file_exists(self.db_location + ".wal"):
            LocalStorage.remove(self.db_location + ".wal")
--- a/cognee/infrastructure/databases/relational/duckdb/init.py
+++ b/cognee/infrastructure/databases/relational/duckdb/init.py
--- a/cognee/infrastructure/databases/relational/relational_db_interface.py
+++ b/cognee/infrastructure/databases/relational/relational_db_interface.py
@ -1,26 +0,0 @@
 from abc import abstractmethod
 from typing import Protocol, TypeVar, Type, List
 RowDataType = TypeVar('RowDataType')
 class RelationalDBInterface(Protocol):
    @abstractmethod
    async def create_database(self, database_name: str, database_path: str): raise NotImplementedError
    @abstractmethod
    async def create_table(self, table_name: str, table_config: object): raise NotImplementedError
    @abstractmethod
    async def add_row(self, table_name: str, row_data: Type[RowDataType]): raise NotImplementedError
    @abstractmethod
    async def add_rows(self, table_name: str, rows_data: List[Type[RowDataType]]): raise NotImplementedError
    @abstractmethod
    async def get_row(self, table_name: str, row_id: str): raise NotImplementedError
    @abstractmethod
    async def update_row(self, table_name: str, row_id: str, row_data: Type[RowDataType]): raise NotImplementedError
    @abstractmethod
    async def delete_row(self, table_name: str, row_id: str): raise NotImplementedError
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@ -1,39 +1,18 @@
 import os
 import asyncio
 from typing import AsyncGenerator
 from contextlib import asynccontextmanager
-from sqlalchemy import create_engine, text, select
+from sqlalchemy import text, select
-from sqlalchemy.orm import sessionmaker, joinedload
+from sqlalchemy.orm import joinedload
 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
-from cognee.infrastructure.files.storage import LocalStorage
+
 from cognee.infrastructure.databases.relational.FakeAsyncSession import FakeAsyncSession
 from ..ModelBase import Base
 def make_async_sessionmaker(sessionmaker):
    @asynccontextmanager
    async def async_session_maker():
        await asyncio.sleep(0.1)
        session = FakeAsyncSession(sessionmaker())
        try:
            yield session
        finally:
            await session.close()  # Ensure the session is closed
    return async_session_maker
 class SQLAlchemyAdapter():
-    def __init__(self, db_type: str, db_path: str, db_name: str, db_user: str, db_password: str, db_host: str, db_port: str):
+    def __init__(self, connection_string: str):
-        self.db_location = os.path.abspath(os.path.join(db_path, db_name))
+        self.engine = create_async_engine(connection_string)
-        self.db_name = db_name
+        self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
-        if db_type == "duckdb":
+        if self.engine.dialect.name == "sqlite":
-            LocalStorage.ensure_directory_exists(db_path)
+            self.db_path = connection_string.split("///")[1]
            self.engine = create_engine(f"duckdb:///{self.db_location}")
            self.sessionmaker = make_async_sessionmaker(sessionmaker(bind=self.engine))
        else:
            self.engine = create_async_engine(f"postgresql+asyncpg://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}")
            self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
    @asynccontextmanager
    async def get_async_session(self) -> AsyncGenerator[AsyncSession, None]:
@ -72,6 +51,7 @@ class SQLAlchemyAdapter():
            await connection.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE;"))
            await connection.close()
    async def insert_data(self, schema_name: str, table_name: str, data: list[dict]):
        columns = ", ".join(data[0].keys())
        values = ", ".join([f"({', '.join([f':{key}' for key in row.keys()])})" for row in data])
@ -80,6 +60,7 @@ class SQLAlchemyAdapter():
        async with self.engine.begin() as connection:
            await connection.execute(insert_query, data)
            await connection.close()
    async def get_data(self, table_name: str, filters: dict = None):
        async with self.engine.begin() as connection:
            query = f"SELECT * FROM {table_name}"
@ -113,11 +94,19 @@ class SQLAlchemyAdapter():
                print(f"Error dropping database tables: {e}")
    async def delete_database(self):
-        async with self.engine.begin() as connection:
+        try:
-            try:
+            if self.engine.dialect.name == "sqlite":
-                for table in Base.metadata.sorted_tables:
+                from cognee.infrastructure.files.storage import LocalStorage
-                    drop_table_query = text(f'DROP TABLE IF EXISTS {table.name} CASCADE')
+
-                    await connection.execute(drop_table_query)
+                LocalStorage.remove(self.db_path)
-                print("Database deleted successfully.")
+                self.db_path = None
-            except Exception as e:
+            else:
-                print(f"Error deleting database: {e}")
+                async with self.engine.begin() as connection:
                    for table in Base.metadata.sorted_tables:
                        drop_table_query = text(f'DROP TABLE IF EXISTS {table.name} CASCADE')
                        await connection.execute(drop_table_query)
        except Exception as e:
            print(f"Error deleting database: {e}")
        print("Database deleted successfully.")
--- a/cognee/infrastructure/databases/relational/sqlite/SqliteEngine.py
+++ b/cognee/infrastructure/databases/relational/sqlite/SqliteEngine.py
@ -1,82 +0,0 @@
 import os
 import asyncio
 from typing import Callable
 from sqlalchemy.inspection import inspect
 from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncEngine, AsyncSession, async_scoped_session
 from sqlalchemy.future import select
 from cognee.infrastructure.files.storage.LocalStorage import LocalStorage
 from ..DatabaseEngine import DatabaseEngine
 from ..ModelBase import Base
 from ..utils import with_rollback
 class SqliteEngine(DatabaseEngine):
    db_path: str = None
    db_name: str = None
    engine: AsyncEngine = None
    session_maker: Callable[[], async_scoped_session[AsyncSession]] = None
    is_db_done: bool = False
    def __init__(self, db_path: str, db_name: str):
        self.db_path = db_path
        self.db_name = db_name
        self.db_location = db_path + "/" + db_name
        self.engine = create_async_engine(
            f"sqlite+aiosqlite:///{self.db_location}",
            pool_recycle = 3600,
            echo = False
        )
        self.session_maker = lambda: async_scoped_session(
            async_sessionmaker(
                bind = self.engine,
                class_ = AsyncSession
            ),
            scopefunc = asyncio.current_task
        )
    async def ensure_tables(self):
        if not self.database_exists(self.db_name):
            self.create_database(self.db_name)
            await self.create_tables()
            self.is_db_done = True
            return True
    def database_exists(self, db_name: str) -> bool:
        return os.path.exists(self.db_path + "/" + db_name)
    def create_database(self, db_name: str):
        LocalStorage.ensure_directory_exists(self.db_path)
        with open(self.db_path + "/" + db_name, mode = "w+", encoding = "utf-8") as file:
            file.write("")
    def drop_database(self, db_name: str):
        os.remove(self.db_location)
    async def table_exists(self, table_name: str) -> bool:
        return inspect(self.engine).has_table(table_name)
    async def create_tables(self):
        async with self.engine.begin() as connection:
            return await connection.run_sync(Base.metadata.create_all)
    async def create(self, data):
        async with with_rollback(self.session_maker()) as session:
            session.add(data)
    async def query(self, query_term):
        async with with_rollback(self.session_maker()) as session:
            return await session.execute(query_term)
    async def query_entity(self, entity):
        async with with_rollback(self.session_maker()) as session:
            return await session.execute(
                select(type(entity))
                    .where(type(entity).id == entity.id)
            )
    async def update(self, data_update_fn):
        async with with_rollback(self.session_maker()):
            data_update_fn()
--- a/cognee/infrastructure/databases/relational/sqlite/init.py
+++ b/cognee/infrastructure/databases/relational/sqlite/init.py
--- a/cognee/infrastructure/databases/relational/utils/init.py
+++ b/cognee/infrastructure/databases/relational/utils/init.py
@ -1 +0,0 @@
 from .with_rollback import with_rollback
--- a/cognee/infrastructure/databases/relational/utils/with_rollback.py
+++ b/cognee/infrastructure/databases/relational/utils/with_rollback.py
@ -1,18 +0,0 @@
 import logging
 from contextlib import asynccontextmanager
 from sqlalchemy.ext.asyncio import async_scoped_session
 logger = logging.getLogger(__name__)
@asynccontextmanager
 async def with_rollback(session: async_scoped_session):
    """Provide a transactional scope around a series of operations."""
    try:
        # async with session.begin():
        yield session
        await session.commit()
        await session.remove()
    except Exception as exception:
        await session.rollback()
        logger.error("Session rolled back due to: %s", str(exception))
        raise exception
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@ -4,7 +4,7 @@ import litellm
 from litellm import aembedding
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
-litellm.set_verbose = True
+litellm.set_verbose = False
 class LiteLLMEmbeddingEngine(EmbeddingEngine):
    api_key: str
--- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
+++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
@ -106,11 +106,10 @@ class QDrantAdapter(VectorDBInterface):
        points = [convert_to_qdrant_point(point) for point in data_points]
        try:
-            result = await client.upload_points(
+            client.upload_points(
                collection_name = collection_name,
                points = points
            )
            return result
        except Exception as error:
            logger.error("Error uploading data points to Qdrant: %s", str(error))
            raise error
--- a/cognee/infrastructure/pipeline/models/Operation.py
+++ b/cognee/infrastructure/pipeline/models/Operation.py
@ -1,7 +1,7 @@
 from datetime import datetime
 from sqlalchemy.orm import Mapped, MappedColumn
-from sqlalchemy import Column, String, DateTime, ForeignKey, Enum, UUID, JSON
+from sqlalchemy import Column, DateTime, ForeignKey, Enum, JSON
-from cognee.infrastructure.databases.relational import ModelBase
+from cognee.infrastructure.databases.relational import Base, UUID
 class OperationType(Enum):
    MERGE_DATA = "MERGE_DATA"
@ -14,10 +14,10 @@ class OperationStatus(Enum):
    ERROR = "OPERATION_ERROR"
    CANCELLED = "OPERATION_CANCELLED"
-class Operation(ModelBase):
+class Operation(Base):
    __tablename__ = "operation"
-    id = Column(String, primary_key = True)
+    id = Column(UUID, primary_key = True)
    status = Column(Enum(OperationStatus))
    operation_type = Column(Enum(OperationType))
--- a/cognee/modules/data/methods/get_datasets_by_name.py
+++ b/cognee/modules/data/methods/get_datasets_by_name.py
@ -7,6 +7,8 @@ async def get_datasets_by_name(dataset_names: list[str], user_id: UUID) -> list[
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        if isinstance(dataset_names, str):
            dataset_names = [dataset_names]
        datasets = (await session.scalars(
            select(Dataset)
                .filter(Dataset.owner_id == user_id)
--- a/cognee/modules/data/models/Data.py
+++ b/cognee/modules/data/models/Data.py
@ -2,14 +2,14 @@ from uuid import uuid4
 from typing import List
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, String, DateTime, UUID
+from sqlalchemy import Column, String, DateTime
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 from .DatasetData import DatasetData
 class Data(Base):
    __tablename__ = "data"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    name = Column(String)
    extension = Column(String)
--- a/cognee/modules/data/models/Dataset.py
+++ b/cognee/modules/data/models/Dataset.py
@ -2,14 +2,14 @@ from uuid import uuid4
 from typing import List
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, Text, DateTime, UUID
+from sqlalchemy import Column, Text, DateTime
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 from .DatasetData import DatasetData
 class Dataset(Base):
    __tablename__ = "datasets"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    name = Column(Text)
--- a/cognee/modules/data/models/DatasetData.py
+++ b/cognee/modules/data/models/DatasetData.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, DateTime, UUID, ForeignKey
+from sqlalchemy import Column, DateTime, ForeignKey
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class DatasetData(Base):
    __tablename__ = "dataset_data"
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
-    dataset_id = Column(UUID(as_uuid = True), ForeignKey("datasets.id"), primary_key = True)
+    dataset_id = Column(UUID, ForeignKey("datasets.id"), primary_key = True)
-    data_id = Column(UUID(as_uuid = True), ForeignKey("data.id"), primary_key = True)
+    data_id = Column(UUID, ForeignKey("data.id"), primary_key = True)
--- a/cognee/modules/data/processing/document_types/AudioDocument.py
+++ b/cognee/modules/data/processing/document_types/AudioDocument.py
@ -6,18 +6,18 @@ from .Document import Document
 class AudioDocument(Document):
    type: str = "audio"
    title: str
-    file_path: str
+    raw_data_location: str
-    chunking_strategy:str
+    chunking_strategy: str
-    def __init__(self, id: UUID, title: str, file_path: str, chunking_strategy:str="paragraph"):
+    def __init__(self, id: UUID, title: str, raw_data_location: str, chunking_strategy:str="paragraph"):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location
        self.chunking_strategy = chunking_strategy
    def read(self):
        # Transcribe the audio file
-        result = get_llm_client().create_transcript(self.file_path)
+        result = get_llm_client().create_transcript(self.raw_data_location)
        text = result.text
        chunker = TextChunker(self.id, get_text = lambda: text)
@ -30,5 +30,5 @@ class AudioDocument(Document):
            id=str(self.id),
            type=self.type,
            title=self.title,
-            file_path=self.file_path,
+            raw_data_location=self.raw_data_location,
        )
--- a/cognee/modules/data/processing/document_types/Document.py
+++ b/cognee/modules/data/processing/document_types/Document.py
@ -5,7 +5,7 @@ class Document(Protocol):
    id: UUID
    type: str
    title: str
-    file_path: str
+    raw_data_location: str
    def read(self) -> str:
        pass
--- a/cognee/modules/data/processing/document_types/ImageDocument.py
+++ b/cognee/modules/data/processing/document_types/ImageDocument.py
@ -7,16 +7,16 @@ from .Document import Document
 class ImageDocument(Document):
    type: str = "image"
    title: str
-    file_path: str
+    raw_data_location: str
-    def __init__(self, id: UUID, title: str, file_path: str):
+    def __init__(self, id: UUID, title: str, raw_data_location: str):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location
    def read(self):
        # Transcribe the image file
-        result = get_llm_client().transcribe_image(self.file_path)
+        result = get_llm_client().transcribe_image(self.raw_data_location)
        text = result.choices[0].message.content
        chunker = TextChunker(self.id, get_text = lambda: text)
@ -29,5 +29,5 @@ class ImageDocument(Document):
            id=str(self.id),
            type=self.type,
            title=self.title,
-            file_path=self.file_path,
+            raw_data_location=self.raw_data_location,
        )
--- a/cognee/modules/data/processing/document_types/PdfDocument.py
+++ b/cognee/modules/data/processing/document_types/PdfDocument.py
@ -6,15 +6,15 @@ from .Document import Document
 class PdfDocument(Document):
    type: str = "pdf"
    title: str
-    file_path: str
+    raw_data_location: str
-    def __init__(self, id: UUID, title: str, file_path: str):
+    def __init__(self, id: UUID, title: str, raw_data_location: str):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location
    def read(self) -> PdfReader:
-        file = PdfReader(self.file_path)
+        file = PdfReader(self.raw_data_location)
        def get_text():
            for page in file.pages:
@ -32,5 +32,5 @@ class PdfDocument(Document):
            id = str(self.id),
            type = self.type,
            title = self.title,
-            file_path = self.file_path,
+            raw_data_location = self.raw_data_location,
        )
--- a/cognee/modules/data/processing/document_types/TextDocument.py
+++ b/cognee/modules/data/processing/document_types/TextDocument.py
@ -5,16 +5,16 @@ from .Document import Document
 class TextDocument(Document):
    type: str = "text"
    title: str
-    file_path: str
+    raw_data_location: str
-    def __init__(self, id: UUID, title: str, file_path: str):
+    def __init__(self, id: UUID, title: str, raw_data_location: str):
        self.id = id or uuid5(NAMESPACE_OID, title)
        self.title = title
-        self.file_path = file_path
+        self.raw_data_location = raw_data_location
    def read(self):
        def get_text():
-            with open(self.file_path, mode = "r", encoding = "utf-8") as file:
+            with open(self.raw_data_location, mode = "r", encoding = "utf-8") as file:
                while True:
                    text = file.read(1024)
@ -34,5 +34,5 @@ class TextDocument(Document):
            id = str(self.id),
            type = self.type,
            title = self.title,
-            file_path = self.file_path,
+            raw_data_location = self.raw_data_location,
        )
--- a/cognee/modules/pipelines/models/Pipeline.py
+++ b/cognee/modules/pipelines/models/Pipeline.py
@ -1,14 +1,14 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, UUID, DateTime, String, Text
+from sqlalchemy import Column, DateTime, String, Text
 from sqlalchemy.orm import relationship, Mapped
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 from .PipelineTask import PipelineTask
 class Pipeline(Base):
    __tablename__ = "pipelines"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    name = Column(String)
    description = Column(Text, nullable = True)
--- a/cognee/modules/pipelines/models/PipelineRun.py
+++ b/cognee/modules/pipelines/models/PipelineRun.py
@ -1,16 +1,16 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, UUID, DateTime, String, JSON
+from sqlalchemy import Column, DateTime, String, JSON
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class PipelineRun(Base):
    __tablename__ = "pipeline_runs"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
    status = Column(String)
-    run_id = Column(UUID(as_uuid = True), index = True)
+    run_id = Column(UUID, index = True)
    run_info = Column(JSON)
--- a/cognee/modules/pipelines/models/PipelineTask.py
+++ b/cognee/modules/pipelines/models/PipelineTask.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, DateTime, UUID, ForeignKey
+from sqlalchemy import Column, DateTime, ForeignKey
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class PipelineTask(Base):
    __tablename__ = "pipeline_task"
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
-    pipeline_id = Column("pipeline", UUID(as_uuid = True), ForeignKey("pipeline.id"), primary_key = True)
+    pipeline_id = Column("pipeline", UUID, ForeignKey("pipeline.id"), primary_key = True)
-    task_id = Column("task", UUID(as_uuid = True), ForeignKey("task.id"), primary_key = True)
+    task_id = Column("task", UUID, ForeignKey("task.id"), primary_key = True)
--- a/cognee/modules/pipelines/models/Task.py
+++ b/cognee/modules/pipelines/models/Task.py
@ -1,14 +1,14 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, String, DateTime, UUID, Text
+from sqlalchemy import Column, String, DateTime, Text
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 from .PipelineTask import PipelineTask
 class Task(Base):
    __tablename__ = "tasks"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    name = Column(String)
    description = Column(Text, nullable = True)
--- a/cognee/modules/pipelines/models/TaskRun.py
+++ b/cognee/modules/pipelines/models/TaskRun.py
@ -1,12 +1,12 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, UUID, DateTime, String, JSON
+from sqlalchemy import Column, DateTime, String, JSON
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class TaskRun(Base):
    __tablename__ = "task_runs"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    task_name = Column(String)
--- a/cognee/modules/users/models/ACL.py
+++ b/cognee/modules/users/models/ACL.py
@ -1,20 +1,20 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, ForeignKey, DateTime, UUID
+from sqlalchemy import Column, ForeignKey, DateTime
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 from .ACLResources import ACLResources
 class ACL(Base):
    __tablename__ = "acls"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
-    principal_id = Column(UUID(as_uuid = True), ForeignKey("principals.id"))
+    principal_id = Column(UUID, ForeignKey("principals.id"))
-    permission_id = Column(UUID(as_uuid = True), ForeignKey("permissions.id"))
+    permission_id = Column(UUID, ForeignKey("permissions.id"))
    principal = relationship("Principal")
    permission = relationship("Permission")
--- a/cognee/modules/users/models/ACLResources.py
+++ b/cognee/modules/users/models/ACLResources.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, ForeignKey, UUID, DateTime
+from sqlalchemy import Column, ForeignKey, DateTime
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class ACLResources(Base):
    __tablename__ = "acl_resources"
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
-    acl_id = Column(UUID(as_uuid = True), ForeignKey("acls.id"), primary_key = True)
+    acl_id = Column(UUID, ForeignKey("acls.id"), primary_key = True)
-    resource_id = Column(UUID(as_uuid = True), ForeignKey("resources.id"), primary_key = True)
+    resource_id = Column(UUID, ForeignKey("resources.id"), primary_key = True)
--- a/cognee/modules/users/models/Group.py
+++ b/cognee/modules/users/models/Group.py
@ -1,12 +1,13 @@
 from sqlalchemy.orm import relationship, Mapped
-from sqlalchemy import Column, String, ForeignKey, UUID
+from sqlalchemy import Column, String, ForeignKey
 from cognee.infrastructure.databases.relational import UUID
 from .Principal import Principal
 from .UserGroup import UserGroup
 class Group(Principal):
    __tablename__ = "groups"
-    id = Column(UUID(as_uuid = True), ForeignKey("principals.id"), primary_key = True)
+    id = Column(UUID, ForeignKey("principals.id"), primary_key = True)
    name = Column(String, unique = True, nullable = False, index = True)
--- a/cognee/modules/users/models/Permission.py
+++ b/cognee/modules/users/models/Permission.py
@ -1,8 +1,8 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 # from sqlalchemy.orm import relationship
-from sqlalchemy import Column, DateTime, UUID, String
+from sqlalchemy import Column, DateTime, String
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class Permission(Base):
    __tablename__ = "permissions"
--- a/cognee/modules/users/models/Principal.py
+++ b/cognee/modules/users/models/Principal.py
@ -1,12 +1,12 @@
 from uuid import uuid4
 from datetime import datetime, timezone
-from sqlalchemy import Column, String, DateTime, UUID
+from sqlalchemy import Column, String, DateTime
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class Principal(Base):
    __tablename__ = "principals"
-    id = Column(UUID(as_uuid = True), primary_key = True, index = True, default = uuid4)
+    id = Column(UUID, primary_key = True, index = True, default = uuid4)
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
--- a/cognee/modules/users/models/Resource.py
+++ b/cognee/modules/users/models/Resource.py
@ -1,18 +1,18 @@
 from uuid import uuid4
 from datetime import datetime, timezone
 from sqlalchemy.orm import relationship
-from sqlalchemy import Column, DateTime, UUID
+from sqlalchemy import Column, DateTime
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 from .ACLResources import ACLResources
 class Resource(Base):
    __tablename__ = "resources"
-    id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4)
+    id = Column(UUID, primary_key = True, default = uuid4)
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
-    resource_id = Column(UUID(as_uuid = True), nullable = False)
+    resource_id = Column(UUID, nullable = False)
    acls = relationship("ACL", secondary = ACLResources.__tablename__, back_populates = "resources")
--- a/cognee/modules/users/models/User.py
+++ b/cognee/modules/users/models/User.py
@ -1,14 +1,15 @@
 from uuid import UUID as uuid_UUID
-from sqlalchemy import ForeignKey, UUID, Column
+from sqlalchemy import ForeignKey, Column
 from sqlalchemy.orm import relationship, Mapped
 from fastapi_users.db import SQLAlchemyBaseUserTableUUID
 from cognee.infrastructure.databases.relational import UUID
 from .Principal import Principal
 from .UserGroup import UserGroup
 class User(SQLAlchemyBaseUserTableUUID, Principal):
    __tablename__ = "users"
-    id = Column(UUID(as_uuid = True), ForeignKey("principals.id"), primary_key = True)
+    id = Column(UUID, ForeignKey("principals.id"), primary_key = True)
    groups: Mapped[list["Group"]] = relationship(
        secondary = UserGroup.__tablename__,
--- a/cognee/modules/users/models/UserGroup.py
+++ b/cognee/modules/users/models/UserGroup.py
@ -1,11 +1,11 @@
 from datetime import datetime, timezone
-from sqlalchemy import Column, ForeignKey, DateTime, UUID
+from sqlalchemy import Column, ForeignKey, DateTime
-from cognee.infrastructure.databases.relational import Base
+from cognee.infrastructure.databases.relational import Base, UUID
 class UserGroup(Base):
    __tablename__ = "user_groups"
    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
-    user_id = Column(UUID(as_uuid = True), ForeignKey("users.id"), primary_key = True)
+    user_id = Column(UUID, ForeignKey("users.id"), primary_key = True)
-    group_id = Column(UUID(as_uuid = True), ForeignKey("groups.id"), primary_key = True)
+    group_id = Column(UUID, ForeignKey("groups.id"), primary_key = True)
--- a/cognee/tasks/chunking/chunk_by_word.py
+++ b/cognee/tasks/chunking/chunk_by_word.py
@ -24,7 +24,7 @@ def chunk_by_word(data: str):
            while next_character is not None and (re.match(paragraph_endings, next_character) or next_character == " "):
                j += 1
                next_character = data[j] if j < len(data) else None
-            if next_character.isupper():
+            if next_character and next_character.isupper():
                return True
            return False
--- a/cognee/tasks/classify_documents/classify_documents.py
+++ b/cognee/tasks/classify_documents/classify_documents.py
@ -3,10 +3,10 @@ from cognee.modules.data.processing.document_types import Document, PdfDocument,
 def classify_documents(data_documents: list[Data]) -> list[Document]:
    documents = [
-        PdfDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location) if data_item.extension == "pdf" else
+        PdfDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else
-        AudioDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location) if data_item.extension == "audio" else
+        AudioDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else
-        ImageDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location) if data_item.extension == "image" else
+        ImageDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else
-        TextDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", file_path=data_item.raw_data_location)
+        TextDocument(id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
        for data_item in data_documents
    ]
--- a/cognee/tasks/infer_data_ontology/infer_data_ontology.py
+++ b/cognee/tasks/infer_data_ontology/infer_data_ontology.py
@ -89,7 +89,7 @@ class OntologyEngine:
            chunk_strategy = chunk_config.chunk_strategy
            for base_file in documents:
-                with open(base_file.file_path, "rb") as file:
+                with open(base_file.raw_data_location, "rb") as file:
                    try:
                        file_type = guess_file_type(file)
                        text = extract_text_from_file(file, file_type)
@ -175,7 +175,7 @@ async def infer_data_ontology(documents, ontology_model = KnowledgeGraph, root_n
        ontology_engine = OntologyEngine()
        root_node_id = await ontology_engine.add_graph_ontology(documents = documents)
    else:
-        graph_engine = get_graph_engine()
+        graph_engine = await get_graph_engine()
        await add_model_class_to_graph(ontology_model, graph_engine)
    yield (documents, root_node_id)
--- a/notebooks/cognee_demo_1.5.ipynb
+++ b/notebooks/cognee_demo_1.5.ipynb
@ -0,0 +1,512 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "958375a6ffc0c2e4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:47.336283Z",
     "start_time": "2024-09-20T14:02:43.652444Z"
    }
   },
   "outputs": [],
   "source": [
    "import asyncio\n",
    "import logging\n",
    "from typing import Union\n",
    "\n",
    "from cognee.modules.cognify.config import get_cognify_config\n",
    "from cognee.shared.data_models import KnowledgeGraph\n",
    "from cognee.modules.data.models import Dataset, Data\n",
    "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
    "from cognee.modules.data.methods import get_datasets, get_datasets_by_name\n",
    "from cognee.modules.pipelines.tasks.Task import Task\n",
    "from cognee.modules.pipelines import run_tasks, run_tasks_parallel\n",
    "from cognee.modules.users.models import User\n",
    "from cognee.modules.users.methods import get_default_user\n",
    "from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status\n",
    "from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status\n",
    "from cognee.tasks import chunk_extract_summary, \\\n",
    "    chunk_naive_llm_classifier, \\\n",
    "    chunk_remove_disconnected, \\\n",
    "    infer_data_ontology, \\\n",
    "    save_chunks_to_store, \\\n",
    "    chunk_update_check, \\\n",
    "    chunks_into_graph, \\\n",
    "    source_documents_to_chunks, \\\n",
    "    check_permissions_on_documents, \\\n",
    "    classify_documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "df16431d0f48b006",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:48.519686Z",
     "start_time": "2024-09-20T14:02:48.515589Z"
    }
   },
   "outputs": [],
   "source": [
    "job_position = \"\"\"Senior Data Scientist (Machine Learning)\n",
    "\n",
    "Company: TechNova Solutions\n",
    "Location: San Francisco, CA\n",
    "\n",
    "Job Description:\n",
    "\n",
    "TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights.\n",
    "\n",
    "Responsibilities:\n",
    "\n",
    "Develop and implement advanced machine learning algorithms and models.\n",
    "Analyze large, complex datasets to extract meaningful patterns and insights.\n",
    "Collaborate with cross-functional teams to integrate predictive models into products.\n",
    "Stay updated with the latest advancements in machine learning and data science.\n",
    "Mentor junior data scientists and provide technical guidance.\n",
    "Qualifications:\n",
    "\n",
    "Master’s or Ph.D. in Data Science, Computer Science, Statistics, or a related field.\n",
    "5+ years of experience in data science and machine learning.\n",
    "Proficient in Python, R, and SQL.\n",
    "Experience with deep learning frameworks (e.g., TensorFlow, PyTorch).\n",
    "Strong problem-solving skills and attention to detail.\n",
    "Candidate CVs\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9086abf3af077ab4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:49.120838Z",
     "start_time": "2024-09-20T14:02:49.118294Z"
    }
   },
   "outputs": [],
   "source": [
    "job_1 = \"\"\"\n",
    "CV 1: Relevant\n",
    "Name: Dr. Emily Carter\n",
    "Contact Information:\n",
    "\n",
    "Email: emily.carter@example.com\n",
    "Phone: (555) 123-4567\n",
    "Summary:\n",
    "\n",
    "Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.\n",
    "\n",
    "Education:\n",
    "\n",
    "Ph.D. in Computer Science, Stanford University (2014)\n",
    "B.S. in Mathematics, University of California, Berkeley (2010)\n",
    "Experience:\n",
    "\n",
    "Senior Data Scientist, InnovateAI Labs (2016 – Present)\n",
    "Led a team in developing machine learning models for natural language processing applications.\n",
    "Implemented deep learning algorithms that improved prediction accuracy by 25%.\n",
    "Collaborated with cross-functional teams to integrate models into cloud-based platforms.\n",
    "Data Scientist, DataWave Analytics (2014 – 2016)\n",
    "Developed predictive models for customer segmentation and churn analysis.\n",
    "Analyzed large datasets using Hadoop and Spark frameworks.\n",
    "Skills:\n",
    "\n",
    "Programming Languages: Python, R, SQL\n",
    "Machine Learning: TensorFlow, Keras, Scikit-Learn\n",
    "Big Data Technologies: Hadoop, Spark\n",
    "Data Visualization: Tableau, Matplotlib\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a9de0cc07f798b7f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:49.675003Z",
     "start_time": "2024-09-20T14:02:49.671615Z"
    }
   },
   "outputs": [],
   "source": [
    "job_2 = \"\"\"\n",
    "CV 2: Relevant\n",
    "Name: Michael Rodriguez\n",
    "Contact Information:\n",
    "\n",
    "Email: michael.rodriguez@example.com\n",
    "Phone: (555) 234-5678\n",
    "Summary:\n",
    "\n",
    "Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.\n",
    "\n",
    "Education:\n",
    "\n",
    "M.S. in Data Science, Carnegie Mellon University (2013)\n",
    "B.S. in Computer Science, University of Michigan (2011)\n",
    "Experience:\n",
    "\n",
    "Senior Data Scientist, Alpha Analytics (2017 – Present)\n",
    "Developed machine learning models to optimize marketing strategies.\n",
    "Reduced customer acquisition cost by 15% through predictive modeling.\n",
    "Data Scientist, TechInsights (2013 – 2017)\n",
    "Analyzed user behavior data to improve product features.\n",
    "Implemented A/B testing frameworks to evaluate product changes.\n",
    "Skills:\n",
    "\n",
    "Programming Languages: Python, Java, SQL\n",
    "Machine Learning: Scikit-Learn, XGBoost\n",
    "Data Visualization: Seaborn, Plotly\n",
    "Databases: MySQL, MongoDB\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "185ff1c102d06111",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:50.286828Z",
     "start_time": "2024-09-20T14:02:50.284369Z"
    }
   },
   "outputs": [],
   "source": [
    "job_3 = \"\"\"\n",
    "CV 3: Relevant\n",
    "Name: Sarah Nguyen\n",
    "Contact Information:\n",
    "\n",
    "Email: sarah.nguyen@example.com\n",
    "Phone: (555) 345-6789\n",
    "Summary:\n",
    "\n",
    "Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.\n",
    "\n",
    "Education:\n",
    "\n",
    "M.S. in Statistics, University of Washington (2014)\n",
    "B.S. in Applied Mathematics, University of Texas at Austin (2012)\n",
    "Experience:\n",
    "\n",
    "Data Scientist, QuantumTech (2016 – Present)\n",
    "Designed and implemented machine learning algorithms for financial forecasting.\n",
    "Improved model efficiency by 20% through algorithm optimization.\n",
    "Junior Data Scientist, DataCore Solutions (2014 – 2016)\n",
    "Assisted in developing predictive models for supply chain optimization.\n",
    "Conducted data cleaning and preprocessing on large datasets.\n",
    "Skills:\n",
    "\n",
    "Programming Languages: Python, R\n",
    "Machine Learning Frameworks: PyTorch, Scikit-Learn\n",
    "Statistical Analysis: SAS, SPSS\n",
    "Cloud Platforms: AWS, Azure\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d55ce4c58f8efb67",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:50.950343Z",
     "start_time": "2024-09-20T14:02:50.946378Z"
    }
   },
   "outputs": [],
   "source": [
    "job_4 = \"\"\"\n",
    "CV 4: Not Relevant\n",
    "Name: David Thompson\n",
    "Contact Information:\n",
    "\n",
    "Email: david.thompson@example.com\n",
    "Phone: (555) 456-7890\n",
    "Summary:\n",
    "\n",
    "Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.\n",
    "\n",
    "Education:\n",
    "\n",
    "B.F.A. in Graphic Design, Rhode Island School of Design (2012)\n",
    "Experience:\n",
    "\n",
    "Senior Graphic Designer, CreativeWorks Agency (2015 – Present)\n",
    "Led design projects for clients in various industries.\n",
    "Created branding materials that increased client engagement by 30%.\n",
    "Graphic Designer, Visual Innovations (2012 – 2015)\n",
    "Designed marketing collateral, including brochures, logos, and websites.\n",
    "Collaborated with the marketing team to develop cohesive brand strategies.\n",
    "Skills:\n",
    "\n",
    "Design Software: Adobe Photoshop, Illustrator, InDesign\n",
    "Web Design: HTML, CSS\n",
    "Specialties: Branding and Identity, Typography\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ca4ecc32721ad332",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:51.548191Z",
     "start_time": "2024-09-20T14:02:51.545520Z"
    }
   },
   "outputs": [],
   "source": [
    "job_5 = \"\"\"\n",
    "CV 5: Not Relevant\n",
    "Name: Jessica Miller\n",
    "Contact Information:\n",
    "\n",
    "Email: jessica.miller@example.com\n",
    "Phone: (555) 567-8901\n",
    "Summary:\n",
    "\n",
    "Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.\n",
    "\n",
    "Education:\n",
    "\n",
    "B.A. in Business Administration, University of Southern California (2010)\n",
    "Experience:\n",
    "\n",
    "Sales Manager, Global Enterprises (2015 – Present)\n",
    "Managed a sales team of 15 members, achieving a 20% increase in annual revenue.\n",
    "Developed sales strategies that expanded customer base by 25%.\n",
    "Sales Representative, Market Leaders Inc. (2010 – 2015)\n",
    "Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.\n",
    "Skills:\n",
    "\n",
    "Sales Strategy and Planning\n",
    "Team Leadership and Development\n",
    "CRM Software: Salesforce, Zoho\n",
    "Negotiation and Relationship Building\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "904df61ba484a8e5",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:54.243987Z",
     "start_time": "2024-09-20T14:02:52.498195Z"
    }
   },
   "outputs": [],
   "source": [
    "import cognee\n",
    "from os import listdir, path\n",
    "\n",
    "data_path = path.abspath(\".data\")\n",
    "\n",
    "results = await cognee.add([job_1, job_2,job_3,job_4,job_5,job_position], \"example\")\n",
    "\n",
    "for result in results:\n",
    "    print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "6f9b564de121713d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:55.564445Z",
     "start_time": "2024-09-20T14:02:55.562784Z"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8911f8bd4f8c440a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:56.714408Z",
     "start_time": "2024-09-20T14:02:56.711812Z"
    }
   },
   "outputs": [],
   "source": [
    "# from enum import Enum, auto\n",
    "# from typing import Optional, List, Union, Dict, Any\n",
    "# from pydantic import BaseModel, Field\n",
    "# \n",
    "# class Node(BaseModel):\n",
    "#     \"\"\"Node in a knowledge graph.\"\"\"\n",
    "#     id: str\n",
    "#     name: str\n",
    "#     type: str\n",
    "#     description: str\n",
    "#     properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the node.\")\n",
    "# \n",
    "# class Edge(BaseModel):\n",
    "#     \"\"\"Edge in a knowledge graph.\"\"\"\n",
    "#     source_node_id: str\n",
    "#     target_node_id: str\n",
    "#     relationship_name: str\n",
    "#     properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the edge.\")\n",
    "# \n",
    "# class KnowledgeGraph(BaseModel):\n",
    "#     \"\"\"Knowledge graph.\"\"\"\n",
    "#     nodes: List[Node] = Field(..., default_factory=list)\n",
    "#     edges: List[Edge] = Field(..., default_factory=list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "7c431fdef4921ae0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:57.925667Z",
     "start_time": "2024-09-20T14:02:57.922353Z"
    }
   },
   "outputs": [],
   "source": [
    "async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
    "    data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
    "\n",
    "    try:\n",
    "\n",
    "        root_node_id = None\n",
    "\n",
    "        tasks = [\n",
    "            Task(classify_documents),\n",
    "            Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
    "            Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),\n",
    "            Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type\n",
    "            Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = \"entities\", task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes\n",
    "            Task(chunk_update_check, collection_name = \"chunks\"), # Find all affected chunks, so we don't process unchanged chunks\n",
    "            Task(\n",
    "                save_chunks_to_store,\n",
    "                collection_name = \"chunks\",\n",
    "            ), \n",
    "            Task(chunk_remove_disconnected), # Remove the obsolete document chunks.\n",
    "        ]\n",
    "\n",
    "        pipeline = run_tasks(tasks, data_documents)\n",
    "\n",
    "        async for result in pipeline:\n",
    "            print(result)\n",
    "    except Exception as error:\n",
    "        raise error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0a91b99c6215e09",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-20T14:02:58.905774Z",
     "start_time": "2024-09-20T14:02:58.625915Z"
    }
   },
   "outputs": [],
   "source": [
    "user = await get_default_user()\n",
    "datasets = await get_datasets_by_name([\"example\"], user.id)\n",
    "await run_cognify_pipeline(datasets[0], user)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "080389e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from cognee.shared.utils import render_graph\n",
    "from cognee.infrastructure.databases.graph import get_graph_engine\n",
    "import graphistry\n",
    "\n",
    "# # Setting an environment variable\n",
    "# os.environ[\"GRAPHISTRY_USERNAME\"] = placeholder\n",
    "# os.environ[\"GRAPHISTRY_PASSWORD\"] = placeholder\n",
    "\n",
    "\n",
    "graphistry.login(username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\"))\n",
    "\n",
    "graph_engine = await get_graph_engine()\n",
    "\n",
    "graph_url = await render_graph(graph_engine.graph)\n",
    "print(graph_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5e7dfc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "async def search(\n",
    "    vector_engine,\n",
    "    collection_name: str,\n",
    "    query_text: str = None,\n",
    "):\n",
    "    query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]\n",
    "\n",
    "    connection = await vector_engine.get_connection()\n",
    "    collection = await connection.open_table(collection_name)\n",
    "\n",
    "    results = await collection.vector_search(query_vector).limit(10).to_pandas()\n",
    "\n",
    "    result_values = list(results.to_dict(\"index\").values())\n",
    "\n",
    "    return [dict(\n",
    "        id = str(result[\"id\"]),\n",
    "        payload = result[\"payload\"],\n",
    "        score = result[\"_distance\"],\n",
    "    ) for result in result_values]\n",
    "\n",
    "\n",
    "from cognee.infrastructure.databases.vector import get_vector_engine\n",
    "\n",
    "vector_engine = get_vector_engine()\n",
    "results = await search(vector_engine, \"entities\", \"sarah.nguyen@example.com\")\n",
    "for result in results:\n",
    "    print(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -38,12 +38,10 @@ greenlet = "^3.0.3"
 ruff = "^0.2.2"
 filetype = "^1.2.0"
 nltk = "^3.8.1"
-dlt = {extras = ["postgres"], version = "^0.5.2"}
+dlt = {extras = ["sqlalchemy"], version = "^1.0.0"}
 duckdb = {version = "^0.10.0", extras = ["dlt"]}
 overrides = "^7.7.0"
 aiofiles = "^23.2.1"
 qdrant-client = "^1.9.0"
 duckdb-engine = "0.13.0"
 graphistry = "^0.33.5"
 tenacity = "^8.2.3"
 weaviate-client = "4.6.7"
@ -75,14 +73,12 @@ asyncpg = "^0.29.0"
 [tool.poetry.extras]
 duckdb = ["duckdb"]
 filesystem = ["s3fs", "botocore"]
 motherduck = ["duckdb"]
 cli = ["pipdeptree", "cron-descriptor"]
 weaviate = ["weaviate-client"]
 qdrant = ["qdrant-client"]
-neo4j = ["neo4j", "py2neo"]
+neo4j = ["neo4j"]
-notebook = ["ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
+notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.0"