Merge branch 'dev' into fix/remove-double-quotes-from-llmconfig-str-params

2025-12-01 10:09:53 +01:00 · 2025-12-01 10:09:53 +01:00 · 5ce1af8cc0
commit 5ce1af8cc0
parent 05c984f98f 508165e883
129 changed files with 4446 additions and 436 deletions
--- a/.env.template
+++ b/.env.template
@ -21,6 +21,10 @@ LLM_PROVIDER="openai"
 LLM_ENDPOINT=""
 LLM_API_VERSION=""
 LLM_MAX_TOKENS="16384"
+# Instructor's modes determine how structured data is requested from and extracted from LLM responses
+# You can change this type (i.e. mode) via this env variable
+# Each LLM has its own default value, e.g. gpt-5 models have "json_schema_mode"
+LLM_INSTRUCTOR_MODE=""

 EMBEDDING_PROVIDER="openai"
 EMBEDDING_MODEL="openai/text-embedding-3-large"
--- a/.github/actions/cognee_setup/action.yml
+++ b/.github/actions/cognee_setup/action.yml
@ -42,3 +42,8 @@ runs:
          done
        fi
        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS
+
+    - name: Add telemetry identifier for telemetry test and in case telemetry is enabled by accident
+      shell: bash
+      run: |
+        echo "test-machine" > .anon_id
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -6,6 +6,14 @@ Please provide a clear, human-generated description of the changes in this PR.
 DO NOT use AI-generated descriptions. We want to understand your thought process and reasoning.
 -->

+## Acceptance Criteria
+<!--
+* Key requirements to the new feature or modification;
+* Proof that the changes work and meet the requirements;
+* Include instructions on how to verify the changes. Describe how to test it locally;
+* Proof that it's sufficiently tested.
+-->
+
 ## Type of Change
 <!-- Please check the relevant option -->
 - [ ] Bug fix (non-breaking change that fixes an issue)
--- a/.github/workflows/basic_tests.yml
+++ b/.github/workflows/basic_tests.yml
@ -75,6 +75,7 @@ jobs:
    name: Run Unit Tests
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      LLM_PROVIDER: openai
      LLM_MODEL: ${{ secrets.LLM_MODEL }}
      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -104,6 +105,7 @@ jobs:
    name: Run Integration Tests
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      LLM_PROVIDER: openai
      LLM_MODEL: ${{ secrets.LLM_MODEL }}
      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -132,6 +134,7 @@ jobs:
    name: Run Simple Examples
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      LLM_PROVIDER: openai
      LLM_MODEL: ${{ secrets.LLM_MODEL }}
      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -161,6 +164,7 @@ jobs:
    name: Run Simple Examples BAML
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      STRUCTURED_OUTPUT_FRAMEWORK: "BAML"
      BAML_LLM_PROVIDER: openai
      BAML_LLM_MODEL: ${{ secrets.OPENAI_MODEL }}
@ -198,6 +202,7 @@ jobs:
    name: Run Basic Graph Tests
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      LLM_PROVIDER: openai
      LLM_MODEL: ${{ secrets.LLM_MODEL }}
      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
--- a/.github/workflows/cli_tests.yml
+++ b/.github/workflows/cli_tests.yml
@ -39,6 +39,7 @@ jobs:
    name: CLI Unit Tests
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      LLM_PROVIDER: openai
      LLM_MODEL: ${{ secrets.LLM_MODEL }}
      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -66,6 +67,7 @@ jobs:
    name: CLI Integration Tests
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      LLM_PROVIDER: openai
      LLM_MODEL: ${{ secrets.LLM_MODEL }}
      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -93,6 +95,7 @@ jobs:
    name: CLI Functionality Tests
    runs-on: ubuntu-22.04
    env:
+      ENV: 'dev'
      LLM_PROVIDER: openai
      LLM_MODEL: ${{ secrets.LLM_MODEL }}
      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
--- a/.github/workflows/db_examples_tests.yml
+++ b/.github/workflows/db_examples_tests.yml
@ -60,7 +60,7 @@ jobs:

      - name: Run Neo4j Example
        env:
-          ENV: dev
+          ENV: 'dev'
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
@ -95,7 +95,7 @@ jobs:

      - name: Run Kuzu Example
        env:
-          ENV: dev
+          ENV: 'dev'
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
@ -141,7 +141,7 @@ jobs:

      - name: Run PGVector Example
        env:
-          ENV: dev
+          ENV: 'dev'
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@ -226,7 +226,7 @@ jobs:
      - name: Dependencies already installed
        run: echo "Dependencies already installed in setup"

-      - name: Run parallel databases test
+      - name: Run permissions test
        env:
          ENV: 'dev'
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
@ -239,6 +239,31 @@ jobs:
          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: uv run python ./cognee/tests/test_permissions.py

+  test-multi-tenancy:
+    name: Test multi tenancy with different situations in Cognee
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run multi tenancy test
+        env:
+          ENV: 'dev'
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./cognee/tests/test_multi_tenancy.py
+
  test-graph-edges:
    name: Test graph edge ingestion
    runs-on: ubuntu-22.04
@ -308,7 +333,7 @@ jobs:
          python-version: '3.11.x'
          extra-dependencies: "postgres redis"

-      - name: Run Concurrent subprocess access test (Kuzu/Lancedb/Postgres)
+      - name: Run Concurrent subprocess access test (Kuzu/Lancedb/Postgres/Redis)
        env:
          ENV: dev
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
@ -321,6 +346,7 @@ jobs:
          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
          GRAPH_DATABASE_PROVIDER: 'kuzu'
          CACHING: true
+          CACHE_BACKEND: 'redis'
          SHARED_KUZU_LOCK: true
          DB_PROVIDER: 'postgres'
          DB_NAME: 'cognee_db'
@ -386,8 +412,8 @@ jobs:
          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: uv run python ./cognee/tests/test_feedback_enrichment.py

-  run_conversation_sessions_test:
-    name: Conversation sessions test
+  run_conversation_sessions_test_redis:
+    name: Conversation sessions test (Redis)
    runs-on: ubuntu-latest
    defaults:
      run:
@ -427,7 +453,60 @@ jobs:
          python-version: '3.11.x'
          extra-dependencies: "postgres redis"

-      - name: Run Conversation session tests
+      - name: Run Conversation session tests (Redis)
+        env:
+          ENV: 'dev'
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          GRAPH_DATABASE_PROVIDER: 'kuzu'
+          CACHING: true
+          CACHE_BACKEND: 'redis'
+          DB_PROVIDER: 'postgres'
+          DB_NAME: 'cognee_db'
+          DB_HOST: '127.0.0.1'
+          DB_PORT: 5432
+          DB_USERNAME: cognee
+          DB_PASSWORD: cognee
+        run: uv run python ./cognee/tests/test_conversation_history.py
+
+  run_conversation_sessions_test_fs:
+    name: Conversation sessions test (FS)
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash
+    services:
+      postgres:
+        image: pgvector/pgvector:pg17
+        env:
+          POSTGRES_USER: cognee
+          POSTGRES_PASSWORD: cognee
+          POSTGRES_DB: cognee_db
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 5432:5432
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+          extra-dependencies: "postgres"
+
+      - name: Run Conversation session tests (FS)
        env:
          ENV: dev
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
@ -440,6 +519,7 @@ jobs:
          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
          GRAPH_DATABASE_PROVIDER: 'kuzu'
          CACHING: true
+          CACHE_BACKEND: 'fs'
          DB_PROVIDER: 'postgres'
          DB_NAME: 'cognee_db'
          DB_HOST: '127.0.0.1'
@ -447,44 +527,3 @@ jobs:
          DB_USERNAME: cognee
          DB_PASSWORD: cognee
        run: uv run python ./cognee/tests/test_conversation_history.py
-
-  test-load:
-    name: Test Load
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-
-      - name: Cognee Setup
-        uses: ./.github/actions/cognee_setup
-        with:
-          python-version: '3.11.x'
-          extra-dependencies: "aws"
-
-      - name: Set File Descriptor Limit
-        run: sudo prlimit --pid $$ --nofile=4096:4096
-
-      - name: Verify File Descriptor Limit
-        run: ulimit -n
-
-      - name: Dependencies already installed
-        run: echo "Dependencies already installed in setup"
-
-      - name: Run Load Test
-        env:
-          ENV: 'dev'
-          ENABLE_BACKEND_ACCESS_CONTROL: True
-          LLM_MODEL: ${{ secrets.LLM_MODEL }}
-          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
-          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
-          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
-          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
-          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
-          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
-          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
-          STORAGE_BACKEND: s3
-          AWS_REGION: eu-west-1
-          AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
-        run: uv run python ./cognee/tests/test_load.py
--- a/.github/workflows/examples_tests.yml
+++ b/.github/workflows/examples_tests.yml
@ -21,6 +21,7 @@ jobs:

      - name: Run Multimedia Example
        env:
+          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: uv run python ./examples/python/multimedia_example.py
@ -40,6 +41,7 @@ jobs:

      - name: Run Evaluation Framework Example
        env:
+          ENV: 'dev'
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
@ -69,6 +71,7 @@ jobs:

      - name: Run Descriptive Graph Metrics Example
        env:
+          ENV: 'dev'
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
@ -99,6 +102,7 @@ jobs:

      - name: Run Dynamic Steps Tests
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -124,6 +128,7 @@ jobs:

      - name: Run Temporal Example
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -149,6 +154,7 @@ jobs:

      - name: Run Ontology Demo Example
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -174,6 +180,7 @@ jobs:

      - name: Run Agentic Reasoning Example
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -199,6 +206,7 @@ jobs:

      - name: Run Memify Tests
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -224,6 +232,7 @@ jobs:

      - name: Run Custom Pipeline Example
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -249,6 +258,7 @@ jobs:

      - name: Run Memify Tests
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
@ -274,6 +284,7 @@ jobs:

      - name: Run Docling Test
        env:
+          ENV: 'dev'
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          LLM_MODEL: ${{ secrets.LLM_MODEL }}
          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
--- a/.github/workflows/load_tests.yml
+++ b/.github/workflows/load_tests.yml
@ -0,0 +1,70 @@
+name: Load tests
+
+permissions:
+  contents: read
+
+on:
+  workflow_dispatch:
+  workflow_call:
+    secrets:
+      LLM_MODEL:
+        required: true
+      LLM_ENDPOINT:
+        required: true
+      LLM_API_KEY:
+        required: true
+      LLM_API_VERSION:
+        required: true
+      EMBEDDING_MODEL:
+        required: true
+      EMBEDDING_ENDPOINT:
+        required: true
+      EMBEDDING_API_KEY:
+        required: true
+      EMBEDDING_API_VERSION:
+        required: true
+      OPENAI_API_KEY:
+        required: true
+      AWS_ACCESS_KEY_ID:
+        required: true
+      AWS_SECRET_ACCESS_KEY:
+        required: true
+
+jobs:
+  test-load:
+    name: Test Load
+    runs-on: ubuntu-22.04
+    timeout-minutes: 60
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+          extra-dependencies: "aws"
+
+      - name: Verify File Descriptor Limit
+        run: ulimit -n
+
+      - name: Run Load Test
+        env:
+          ENV: 'dev'
+          ENABLE_BACKEND_ACCESS_CONTROL: True
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          STORAGE_BACKEND: s3
+          AWS_REGION: eu-west-1
+          AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
+        run: uv run python ./cognee/tests/test_load.py
+  
+    
--- a/.github/workflows/release_test.yml
+++ b/.github/workflows/release_test.yml
@ -0,0 +1,17 @@
+# Long-running, heavy and resource-consuming tests for release validation
+name: Release Test Workflow
+
+permissions:
+  contents: read
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  load-tests:
+    name: Load Tests
+    uses: ./.github/workflows/load_tests.yml
+    secrets: inherit
--- a/.github/workflows/weighted_edges_tests.yml
+++ b/.github/workflows/weighted_edges_tests.yml
@ -2,7 +2,7 @@ name: Weighted Edges Tests

 on:
  push:
-    branches: [ main, weighted_edges ]
+    branches: [ main, dev, weighted_edges ]
    paths:
      - 'cognee/modules/graph/utils/get_graph_from_model.py'
      - 'cognee/infrastructure/engine/models/Edge.py'
@ -10,7 +10,7 @@ on:
      - 'examples/python/weighted_edges_example.py'
      - '.github/workflows/weighted_edges_tests.yml'
  pull_request:
-    branches: [ main ]
+    branches: [ main, dev ]
    paths:
      - 'cognee/modules/graph/utils/get_graph_from_model.py'
      - 'cognee/infrastructure/engine/models/Edge.py'
@ -32,7 +32,7 @@ jobs:
    env:
      LLM_PROVIDER: openai
      LLM_MODEL: gpt-5-mini
-      LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+      LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}

    steps:
      - name: Check out repository
@ -67,14 +67,13 @@ jobs:
    env:
      LLM_PROVIDER: openai
      LLM_MODEL: gpt-5-mini
-      LLM_ENDPOINT: https://api.openai.com/v1/
-      LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+      LLM_ENDPOINT: https://api.openai.com/v1
+      LLM_API_KEY: ${{ secrets.OPENAI_API_KEY  }}
      LLM_API_VERSION: "2024-02-01"
-      EMBEDDING_PROVIDER: openai
-      EMBEDDING_MODEL: text-embedding-3-small
-      EMBEDDING_ENDPOINT: https://api.openai.com/v1/
-      EMBEDDING_API_KEY: ${{ secrets.LLM_API_KEY }}
-      EMBEDDING_API_VERSION: "2024-02-01"
+      EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+      EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+      EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+      EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
    steps:
      - name: Check out repository
        uses: actions/checkout@v4
@ -108,14 +107,14 @@ jobs:
    env:
      LLM_PROVIDER: openai
      LLM_MODEL: gpt-5-mini
-      LLM_ENDPOINT: https://api.openai.com/v1/
-      LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+      LLM_ENDPOINT: https://api.openai.com/v1
+      LLM_API_KEY: ${{ secrets.OPENAI_API_KEY  }}
      LLM_API_VERSION: "2024-02-01"
-      EMBEDDING_PROVIDER: openai
-      EMBEDDING_MODEL: text-embedding-3-small
-      EMBEDDING_ENDPOINT: https://api.openai.com/v1/
-      EMBEDDING_API_KEY: ${{ secrets.LLM_API_KEY }}
-      EMBEDDING_API_VERSION: "2024-02-01"
+      EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+      EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+      EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+      EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+
    steps:
      - name: Check out repository
        uses: actions/checkout@v4
--- a/alembic/env.py
+++ b/alembic/env.py
@ -87,11 +87,6 @@ db_engine = get_relational_engine()

 print("Using database:", db_engine.db_uri)

-if "sqlite" in db_engine.db_uri:
-    from cognee.infrastructure.utils.run_sync import run_sync
-
-    run_sync(db_engine.create_database())
-
 config.set_section_option(
    config.config_ini_section,
    "SQLALCHEMY_DATABASE_URI",
--- a/alembic/versions/211ab850ef3d_add_sync_operations_table.py
+++ b/alembic/versions/211ab850ef3d_add_sync_operations_table.py
@ -10,6 +10,7 @@ from typing import Sequence, Union

 from alembic import op
 import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql


 # revision identifiers, used by Alembic.
@ -26,7 +27,34 @@ def upgrade() -> None:
    connection = op.get_bind()
    inspector = sa.inspect(connection)

+    if op.get_context().dialect.name == "postgresql":
+        syncstatus_enum = postgresql.ENUM(
+            "STARTED", "IN_PROGRESS", "COMPLETED", "FAILED", "CANCELLED", name="syncstatus"
+        )
+        syncstatus_enum.create(op.get_bind(), checkfirst=True)
+
    if "sync_operations" not in inspector.get_table_names():
+        if op.get_context().dialect.name == "postgresql":
+            syncstatus = postgresql.ENUM(
+                "STARTED",
+                "IN_PROGRESS",
+                "COMPLETED",
+                "FAILED",
+                "CANCELLED",
+                name="syncstatus",
+                create_type=False,
+            )
+        else:
+            syncstatus = sa.Enum(
+                "STARTED",
+                "IN_PROGRESS",
+                "COMPLETED",
+                "FAILED",
+                "CANCELLED",
+                name="syncstatus",
+                create_type=False,
+            )
+
        # Table doesn't exist, create it normally
        op.create_table(
            "sync_operations",
@ -34,15 +62,7 @@ def upgrade() -> None:
            sa.Column("run_id", sa.Text(), nullable=True),
            sa.Column(
                "status",
-                sa.Enum(
-                    "STARTED",
-                    "IN_PROGRESS",
-                    "COMPLETED",
-                    "FAILED",
-                    "CANCELLED",
-                    name="syncstatus",
-                    create_type=False,
-                ),
+                syncstatus,
                nullable=True,
            ),
            sa.Column("progress_percentage", sa.Integer(), nullable=True),
--- a/alembic/versions/482cd6517ce4_add_default_user.py
+++ b/alembic/versions/482cd6517ce4_add_default_user.py
@ -23,11 +23,8 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2"


 def upgrade() -> None:
-    try:
-        await_only(create_default_user())
-    except UserAlreadyExists:
-        pass  # It's fine if the default user already exists
+    pass


 def downgrade() -> None:
-    await_only(delete_user("default_user@example.com"))
+    pass
--- a/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py
+++ b/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py
@ -0,0 +1,98 @@
+"""Expand dataset database for multi user
+
+Revision ID: 76625596c5c3
+Revises: 211ab850ef3d
+Create Date: 2025-10-30 12:55:20.239562
+
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = "76625596c5c3"
+down_revision: Union[str, None] = "c946955da633"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def _get_column(inspector, table, name, schema=None):
+    for col in inspector.get_columns(table, schema=schema):
+        if col["name"] == name:
+            return col
+    return None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    insp = sa.inspect(conn)
+
+    vector_database_provider_column = _get_column(
+        insp, "dataset_database", "vector_database_provider"
+    )
+    if not vector_database_provider_column:
+        op.add_column(
+            "dataset_database",
+            sa.Column(
+                "vector_database_provider",
+                sa.String(),
+                unique=False,
+                nullable=False,
+                server_default="lancedb",
+            ),
+        )
+
+    graph_database_provider_column = _get_column(
+        insp, "dataset_database", "graph_database_provider"
+    )
+    if not graph_database_provider_column:
+        op.add_column(
+            "dataset_database",
+            sa.Column(
+                "graph_database_provider",
+                sa.String(),
+                unique=False,
+                nullable=False,
+                server_default="kuzu",
+            ),
+        )
+
+    vector_database_url_column = _get_column(insp, "dataset_database", "vector_database_url")
+    if not vector_database_url_column:
+        op.add_column(
+            "dataset_database",
+            sa.Column("vector_database_url", sa.String(), unique=False, nullable=True),
+        )
+
+    graph_database_url_column = _get_column(insp, "dataset_database", "graph_database_url")
+    if not graph_database_url_column:
+        op.add_column(
+            "dataset_database",
+            sa.Column("graph_database_url", sa.String(), unique=False, nullable=True),
+        )
+
+    vector_database_key_column = _get_column(insp, "dataset_database", "vector_database_key")
+    if not vector_database_key_column:
+        op.add_column(
+            "dataset_database",
+            sa.Column("vector_database_key", sa.String(), unique=False, nullable=True),
+        )
+
+    graph_database_key_column = _get_column(insp, "dataset_database", "graph_database_key")
+    if not graph_database_key_column:
+        op.add_column(
+            "dataset_database",
+            sa.Column("graph_database_key", sa.String(), unique=False, nullable=True),
+        )
+
+
+def downgrade() -> None:
+    op.drop_column("dataset_database", "vector_database_provider")
+    op.drop_column("dataset_database", "graph_database_provider")
+    op.drop_column("dataset_database", "vector_database_url")
+    op.drop_column("dataset_database", "graph_database_url")
+    op.drop_column("dataset_database", "vector_database_key")
+    op.drop_column("dataset_database", "graph_database_key")
--- a/alembic/versions/8057ae7329c2_initial_migration.py
+++ b/alembic/versions/8057ae7329c2_initial_migration.py
@ -18,11 +18,8 @@ depends_on: Union[str, Sequence[str], None] = None


 def upgrade() -> None:
-    db_engine = get_relational_engine()
-    # we might want to delete this
-    await_only(db_engine.create_database())
+    pass


 def downgrade() -> None:
-    db_engine = get_relational_engine()
-    await_only(db_engine.delete_database())
+    pass
--- a/alembic/versions/ab7e313804ae_permission_system_rework.py
+++ b/alembic/versions/ab7e313804ae_permission_system_rework.py
@ -144,44 +144,58 @@ def _create_data_permission(conn, user_id, data_id, permission_name):
    )


+def _get_column(inspector, table, name, schema=None):
+    for col in inspector.get_columns(table, schema=schema):
+        if col["name"] == name:
+            return col
+    return None
+
+
 def upgrade() -> None:
    conn = op.get_bind()
+    insp = sa.inspect(conn)

-    # Recreate ACLs table with default permissions set to datasets instead of documents
-    op.drop_table("acls")
+    dataset_id_column = _get_column(insp, "acls", "dataset_id")
+    if not dataset_id_column:
+        # Recreate ACLs table with default permissions set to datasets instead of documents
+        op.drop_table("acls")

-    acls_table = op.create_table(
-        "acls",
-        sa.Column("id", UUID, primary_key=True, default=uuid4),
-        sa.Column(
-            "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
-        ),
-        sa.Column(
-            "updated_at", sa.DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)
-        ),
-        sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")),
-        sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")),
-        sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")),
-    )
+        acls_table = op.create_table(
+            "acls",
+            sa.Column("id", UUID, primary_key=True, default=uuid4),
+            sa.Column(
+                "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+            ),
+            sa.Column(
+                "updated_at",
+                sa.DateTime(timezone=True),
+                onupdate=lambda: datetime.now(timezone.utc),
+            ),
+            sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")),
+            sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")),
+            sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")),
+        )

-    # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
-    #       definition or load what is in the database
-    dataset_table = _define_dataset_table()
-    datasets = conn.execute(sa.select(dataset_table)).fetchall()
+        # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
+        #       definition or load what is in the database
+        dataset_table = _define_dataset_table()
+        datasets = conn.execute(sa.select(dataset_table)).fetchall()

-    if not datasets:
-        return
+        if not datasets:
+            return

-    acl_list = []
+        acl_list = []

-    for dataset in datasets:
-        acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read"))
-        acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write"))
-        acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share"))
-        acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete"))
+        for dataset in datasets:
+            acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read"))
+            acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write"))
+            acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share"))
+            acl_list.append(
+                _create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete")
+            )

-    if acl_list:
-        op.bulk_insert(acls_table, acl_list)
+        if acl_list:
+            op.bulk_insert(acls_table, acl_list)


 def downgrade() -> None:
--- a/alembic/versions/c946955da633_multi_tenant_support.py
+++ b/alembic/versions/c946955da633_multi_tenant_support.py
@ -0,0 +1,137 @@
+"""Multi Tenant Support
+
+Revision ID: c946955da633
+Revises: 211ab850ef3d
+Create Date: 2025-11-04 18:11:09.325158
+
+"""
+
+from typing import Sequence, Union
+from datetime import datetime, timezone
+from uuid import uuid4
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision: str = "c946955da633"
+down_revision: Union[str, None] = "211ab850ef3d"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def _now():
+    return datetime.now(timezone.utc)
+
+
+def _define_user_table() -> sa.Table:
+    table = sa.Table(
+        "users",
+        sa.MetaData(),
+        sa.Column(
+            "id",
+            sa.UUID,
+            sa.ForeignKey("principals.id", ondelete="CASCADE"),
+            primary_key=True,
+            nullable=False,
+        ),
+        sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), index=True, nullable=True),
+    )
+    return table
+
+
+def _define_dataset_table() -> sa.Table:
+    # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
+    #       definition or load what is in the database
+    table = sa.Table(
+        "datasets",
+        sa.MetaData(),
+        sa.Column("id", sa.UUID, primary_key=True, default=uuid4),
+        sa.Column("name", sa.Text),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            default=lambda: datetime.now(timezone.utc),
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            onupdate=lambda: datetime.now(timezone.utc),
+        ),
+        sa.Column("owner_id", sa.UUID(), sa.ForeignKey("principals.id"), index=True),
+        sa.Column("tenant_id", sa.UUID(), sa.ForeignKey("tenants.id"), index=True, nullable=True),
+    )
+
+    return table
+
+
+def _get_column(inspector, table, name, schema=None):
+    for col in inspector.get_columns(table, schema=schema):
+        if col["name"] == name:
+            return col
+    return None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    insp = sa.inspect(conn)
+
+    dataset = _define_dataset_table()
+    user = _define_user_table()
+
+    if "user_tenants" not in insp.get_table_names():
+        # Define table with all necessary columns including primary key
+        user_tenants = op.create_table(
+            "user_tenants",
+            sa.Column("user_id", sa.UUID, sa.ForeignKey("users.id"), primary_key=True),
+            sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), primary_key=True),
+            sa.Column(
+                "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+            ),
+        )
+
+        # Get all users with their tenant_id
+        user_data = conn.execute(
+            sa.select(user.c.id, user.c.tenant_id).where(user.c.tenant_id.isnot(None))
+        ).fetchall()
+
+        # Insert into user_tenants table
+        if user_data:
+            op.bulk_insert(
+                user_tenants,
+                [
+                    {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()}
+                    for user_id, tenant_id in user_data
+                ],
+            )
+
+    tenant_id_column = _get_column(insp, "datasets", "tenant_id")
+    if not tenant_id_column:
+        op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True))
+
+        # Build subquery, select users.tenant_id for each dataset.owner_id
+        tenant_id_from_dataset_owner = (
+            sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery()
+        )
+
+        if op.get_context().dialect.name == "sqlite":
+            # If column doesn't exist create new original_extension column and update from values of extension column
+            with op.batch_alter_table("datasets") as batch_op:
+                batch_op.execute(
+                    dataset.update().values(
+                        tenant_id=tenant_id_from_dataset_owner,
+                    )
+                )
+        else:
+            conn = op.get_bind()
+            conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner))
+
+        op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"])
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table("user_tenants")
+    op.drop_index(op.f("ix_datasets_tenant_id"), table_name="datasets")
+    op.drop_column("datasets", "tenant_id")
+    # ### end Alembic commands ###
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -194,7 +194,6 @@ async def cognify(

    Prerequisites:
        - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
-        - **Data Added**: Must have data previously added via `cognee.add()`
        - **Vector Database**: Must be accessible for embeddings storage
        - **Graph Database**: Must be accessible for relationship storage

@ -1096,6 +1095,10 @@ async def main():

    # Skip migrations when in API mode (the API server handles its own database)
    if not args.no_migration and not args.api_url:
+        from cognee.modules.engine.operations.setup import setup
+
+        await setup()
+
        # Run Alembic migrations from the main cognee directory where alembic.ini is located
        logger.info("Running database migrations...")
        migration_result = subprocess.run(
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -23,6 +23,7 @@ from cognee.api.v1.settings.routers import get_settings_router
 from cognee.api.v1.datasets.routers import get_datasets_router
 from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
 from cognee.api.v1.search.routers import get_search_router
+from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
 from cognee.api.v1.memify.routers import get_memify_router
 from cognee.api.v1.add.routers import get_add_router
 from cognee.api.v1.delete.routers import get_delete_router
@ -263,6 +264,8 @@ app.include_router(

 app.include_router(get_datasets_router(), prefix="/api/v1/datasets", tags=["datasets"])

+app.include_router(get_ontology_router(), prefix="/api/v1/ontologies", tags=["ontologies"])
+
 app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["settings"])

 app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
--- a/cognee/api/v1/add/routers/get_add_router.py
+++ b/cognee/api/v1/add/routers/get_add_router.py
@ -82,7 +82,9 @@ def get_add_router() -> APIRouter:
                datasetName,
                user=user,
                dataset_id=datasetId,
-                node_set=node_set if node_set else None,
+                node_set=node_set
+                if node_set != [""]
+                else None,  # Transform default node_set endpoint value to None
            )

            if isinstance(add_run, PipelineRunErrored):
--- a/cognee/api/v1/cognify/routers/get_cognify_router.py
+++ b/cognee/api/v1/cognify/routers/get_cognify_router.py
@ -41,6 +41,9 @@ class CognifyPayloadDTO(InDTO):
    custom_prompt: Optional[str] = Field(
        default="", description="Custom prompt for entity extraction and graph generation"
    )
+    ontology_key: Optional[List[str]] = Field(
+        default=None, description="Reference to one or more previously uploaded ontologies"
+    )


 def get_cognify_router() -> APIRouter:
@ -68,6 +71,7 @@ def get_cognify_router() -> APIRouter:
        - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
        - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
        - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
+        - **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction.

        ## Response
        - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@ -82,7 +86,8 @@ def get_cognify_router() -> APIRouter:
        {
            "datasets": ["research_papers", "documentation"],
            "run_in_background": false,
-            "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
+            "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.",
+            "ontology_key": ["medical_ontology_v1"]
        }
        ```

@ -108,13 +113,35 @@ def get_cognify_router() -> APIRouter:
            )

        from cognee.api.v1.cognify import cognify as cognee_cognify
+        from cognee.api.v1.ontologies.ontologies import OntologyService

        try:
            datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
+            config_to_use = None
+
+            if payload.ontology_key:
+                ontology_service = OntologyService()
+                ontology_contents = ontology_service.get_ontology_contents(
+                    payload.ontology_key, user
+                )
+
+                from cognee.modules.ontology.ontology_config import Config
+                from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
+                    RDFLibOntologyResolver,
+                )
+                from io import StringIO
+
+                ontology_streams = [StringIO(content) for content in ontology_contents]
+                config_to_use: Config = {
+                    "ontology_config": {
+                        "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams)
+                    }
+                }

            cognify_run = await cognee_cognify(
                datasets,
                user,
+                config=config_to_use,
                run_in_background=payload.run_in_background,
                custom_prompt=payload.custom_prompt,
            )
--- a/cognee/api/v1/ontologies/init.py
+++ b/cognee/api/v1/ontologies/init.py
@ -0,0 +1,4 @@
+from .ontologies import OntologyService
+from .routers.get_ontology_router import get_ontology_router
+
+__all__ = ["OntologyService", "get_ontology_router"]
--- a/cognee/api/v1/ontologies/ontologies.py
+++ b/cognee/api/v1/ontologies/ontologies.py
@ -0,0 +1,183 @@
+import os
+import json
+import tempfile
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional, List
+from dataclasses import dataclass
+
+
+@dataclass
+class OntologyMetadata:
+    ontology_key: str
+    filename: str
+    size_bytes: int
+    uploaded_at: str
+    description: Optional[str] = None
+
+
+class OntologyService:
+    def __init__(self):
+        pass
+
+    @property
+    def base_dir(self) -> Path:
+        return Path(tempfile.gettempdir()) / "ontologies"
+
+    def _get_user_dir(self, user_id: str) -> Path:
+        user_dir = self.base_dir / str(user_id)
+        user_dir.mkdir(parents=True, exist_ok=True)
+        return user_dir
+
+    def _get_metadata_path(self, user_dir: Path) -> Path:
+        return user_dir / "metadata.json"
+
+    def _load_metadata(self, user_dir: Path) -> dict:
+        metadata_path = self._get_metadata_path(user_dir)
+        if metadata_path.exists():
+            with open(metadata_path, "r") as f:
+                return json.load(f)
+        return {}
+
+    def _save_metadata(self, user_dir: Path, metadata: dict):
+        metadata_path = self._get_metadata_path(user_dir)
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+    async def upload_ontology(
+        self, ontology_key: str, file, user, description: Optional[str] = None
+    ) -> OntologyMetadata:
+        if not file.filename.lower().endswith(".owl"):
+            raise ValueError("File must be in .owl format")
+
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
+
+        if ontology_key in metadata:
+            raise ValueError(f"Ontology key '{ontology_key}' already exists")
+
+        content = await file.read()
+        if len(content) > 10 * 1024 * 1024:
+            raise ValueError("File size exceeds 10MB limit")
+
+        file_path = user_dir / f"{ontology_key}.owl"
+        with open(file_path, "wb") as f:
+            f.write(content)
+
+        ontology_metadata = {
+            "filename": file.filename,
+            "size_bytes": len(content),
+            "uploaded_at": datetime.now(timezone.utc).isoformat(),
+            "description": description,
+        }
+        metadata[ontology_key] = ontology_metadata
+        self._save_metadata(user_dir, metadata)
+
+        return OntologyMetadata(
+            ontology_key=ontology_key,
+            filename=file.filename,
+            size_bytes=len(content),
+            uploaded_at=ontology_metadata["uploaded_at"],
+            description=description,
+        )
+
+    async def upload_ontologies(
+        self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
+    ) -> List[OntologyMetadata]:
+        """
+        Upload ontology files with their respective keys.
+
+        Args:
+            ontology_key: List of unique keys for each ontology
+            files: List of UploadFile objects (same length as keys)
+            user: Authenticated user
+            descriptions: Optional list of descriptions for each file
+
+        Returns:
+            List of OntologyMetadata objects for uploaded files
+
+        Raises:
+            ValueError: If keys duplicate, file format invalid, or array lengths don't match
+        """
+        if len(ontology_key) != len(files):
+            raise ValueError("Number of keys must match number of files")
+
+        if len(set(ontology_key)) != len(ontology_key):
+            raise ValueError("Duplicate ontology keys not allowed")
+
+        if descriptions and len(descriptions) != len(files):
+            raise ValueError("Number of descriptions must match number of files")
+
+        results = []
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
+
+        for i, (key, file) in enumerate(zip(ontology_key, files)):
+            if key in metadata:
+                raise ValueError(f"Ontology key '{key}' already exists")
+
+            if not file.filename.lower().endswith(".owl"):
+                raise ValueError(f"File '{file.filename}' must be in .owl format")
+
+            content = await file.read()
+            if len(content) > 10 * 1024 * 1024:
+                raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
+
+            file_path = user_dir / f"{key}.owl"
+            with open(file_path, "wb") as f:
+                f.write(content)
+
+            ontology_metadata = {
+                "filename": file.filename,
+                "size_bytes": len(content),
+                "uploaded_at": datetime.now(timezone.utc).isoformat(),
+                "description": descriptions[i] if descriptions else None,
+            }
+            metadata[key] = ontology_metadata
+
+            results.append(
+                OntologyMetadata(
+                    ontology_key=key,
+                    filename=file.filename,
+                    size_bytes=len(content),
+                    uploaded_at=ontology_metadata["uploaded_at"],
+                    description=descriptions[i] if descriptions else None,
+                )
+            )
+
+        self._save_metadata(user_dir, metadata)
+        return results
+
+    def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
+        """
+        Retrieve ontology content for one or more keys.
+
+        Args:
+            ontology_key: List of ontology keys to retrieve (can contain single item)
+            user: Authenticated user
+
+        Returns:
+            List of ontology content strings
+
+        Raises:
+            ValueError: If any ontology key not found
+        """
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
+
+        contents = []
+        for key in ontology_key:
+            if key not in metadata:
+                raise ValueError(f"Ontology key '{key}' not found")
+
+            file_path = user_dir / f"{key}.owl"
+            if not file_path.exists():
+                raise ValueError(f"Ontology file for key '{key}' not found")
+
+            with open(file_path, "r", encoding="utf-8") as f:
+                contents.append(f.read())
+        return contents
+
+    def list_ontologies(self, user) -> dict:
+        user_dir = self._get_user_dir(str(user.id))
+        return self._load_metadata(user_dir)
--- a/cognee/api/v1/ontologies/routers/init.py
+++ b/cognee/api/v1/ontologies/routers/init.py
--- a/cognee/api/v1/ontologies/routers/get_ontology_router.py
+++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py
@ -0,0 +1,107 @@
+from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
+from fastapi.responses import JSONResponse
+from typing import Optional, List
+
+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_authenticated_user
+from cognee.shared.utils import send_telemetry
+from cognee import __version__ as cognee_version
+from ..ontologies import OntologyService
+
+
+def get_ontology_router() -> APIRouter:
+    router = APIRouter()
+    ontology_service = OntologyService()
+
+    @router.post("", response_model=dict)
+    async def upload_ontology(
+        ontology_key: str = Form(...),
+        ontology_file: List[UploadFile] = File(...),
+        descriptions: Optional[str] = Form(None),
+        user: User = Depends(get_authenticated_user),
+    ):
+        """
+        Upload ontology files with their respective keys for later use in cognify operations.
+
+        Supports both single and multiple file uploads:
+        - Single file: ontology_key=["key"], ontology_file=[file]
+        - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
+
+        ## Request Parameters
+        - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
+        - **ontology_file** (List[UploadFile]): OWL format ontology files
+        - **descriptions** (Optional[str]): JSON array string of optional descriptions
+
+        ## Response
+        Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
+
+        ## Error Codes
+        - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
+        - **500 Internal Server Error**: File system or processing errors
+        """
+        send_telemetry(
+            "Ontology Upload API Endpoint Invoked",
+            user.id,
+            additional_properties={
+                "endpoint": "POST /api/v1/ontologies",
+                "cognee_version": cognee_version,
+            },
+        )
+
+        try:
+            import json
+
+            ontology_keys = json.loads(ontology_key)
+            description_list = json.loads(descriptions) if descriptions else None
+
+            if not isinstance(ontology_keys, list):
+                raise ValueError("ontology_key must be a JSON array")
+
+            results = await ontology_service.upload_ontologies(
+                ontology_keys, ontology_file, user, description_list
+            )
+
+            return {
+                "uploaded_ontologies": [
+                    {
+                        "ontology_key": result.ontology_key,
+                        "filename": result.filename,
+                        "size_bytes": result.size_bytes,
+                        "uploaded_at": result.uploaded_at,
+                        "description": result.description,
+                    }
+                    for result in results
+                ]
+            }
+        except (json.JSONDecodeError, ValueError) as e:
+            return JSONResponse(status_code=400, content={"error": str(e)})
+        except Exception as e:
+            return JSONResponse(status_code=500, content={"error": str(e)})
+
+    @router.get("", response_model=dict)
+    async def list_ontologies(user: User = Depends(get_authenticated_user)):
+        """
+        List all uploaded ontologies for the authenticated user.
+
+        ## Response
+        Returns a dictionary mapping ontology keys to their metadata including filename, size, and upload timestamp.
+
+        ## Error Codes
+        - **500 Internal Server Error**: File system or processing errors
+        """
+        send_telemetry(
+            "Ontology List API Endpoint Invoked",
+            user.id,
+            additional_properties={
+                "endpoint": "GET /api/v1/ontologies",
+                "cognee_version": cognee_version,
+            },
+        )
+
+        try:
+            metadata = ontology_service.list_ontologies(user)
+            return metadata
+        except Exception as e:
+            return JSONResponse(status_code=500, content={"error": str(e)})
+
+    return router
--- a/cognee/api/v1/permissions/routers/get_permissions_router.py
+++ b/cognee/api/v1/permissions/routers/get_permissions_router.py
@ -1,15 +1,20 @@
 from uuid import UUID
-from typing import List
+from typing import List, Union

 from fastapi import APIRouter, Depends
 from fastapi.responses import JSONResponse

 from cognee.modules.users.models import User
+from cognee.api.DTO import InDTO
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.shared.utils import send_telemetry
 from cognee import __version__ as cognee_version


+class SelectTenantDTO(InDTO):
+    tenant_id: UUID | None = None
+
+
 def get_permissions_router() -> APIRouter:
    permissions_router = APIRouter()

@ -226,4 +231,39 @@ def get_permissions_router() -> APIRouter:
            status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)}
        )

+    @permissions_router.post("/tenants/select")
+    async def select_tenant(payload: SelectTenantDTO, user: User = Depends(get_authenticated_user)):
+        """
+        Select current tenant.
+
+        This endpoint selects a tenant with the specified UUID. Tenants are used
+        to organize users and resources in multi-tenant environments, providing
+        isolation and access control between different groups or organizations.
+
+        Sending a null/None value as tenant_id selects his default single user tenant
+
+        ## Request Parameters
+        - **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant
+
+        ## Response
+        Returns a success message along with selected tenant id.
+        """
+        send_telemetry(
+            "Permissions API Endpoint Invoked",
+            user.id,
+            additional_properties={
+                "endpoint": f"POST /v1/permissions/tenants/{str(payload.tenant_id)}",
+                "tenant_id": str(payload.tenant_id),
+            },
+        )
+
+        from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method
+
+        await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id)
+
+        return JSONResponse(
+            status_code=200,
+            content={"message": "Tenant selected.", "tenant_id": str(payload.tenant_id)},
+        )
+
    return permissions_router
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@ -31,6 +31,8 @@ async def search(
    only_context: bool = False,
    use_combined_context: bool = False,
    session_id: Optional[str] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
    """
    Search and query the knowledge graph for insights, information, and connections.
@ -200,6 +202,8 @@ async def search(
        only_context=only_context,
        use_combined_context=use_combined_context,
        session_id=session_id,
+        wide_search_top_k=wide_search_top_k,
+        triplet_distance_penalty=triplet_distance_penalty,
    )

    return filtered_search_results
--- a/cognee/cli/commands/cognify_command.py
+++ b/cognee/cli/commands/cognify_command.py
@ -22,7 +22,7 @@ relationships, and creates semantic connections for enhanced search and reasonin

 Processing Pipeline:
 1. **Document Classification**: Identifies document types and structures
-2. **Permission Validation**: Ensures user has processing rights  
+2. **Permission Validation**: Ensures user has processing rights
 3. **Text Chunking**: Breaks content into semantically meaningful segments
 4. **Entity Extraction**: Identifies key concepts, people, places, organizations
 5. **Relationship Detection**: Discovers connections between entities
@ -97,6 +97,13 @@ After successful cognify processing, use `cognee search` to query the knowledge
                            chunker_class = LangchainChunker
                        except ImportError:
                            fmt.warning("LangchainChunker not available, using TextChunker")
+                    elif args.chunker == "CsvChunker":
+                        try:
+                            from cognee.modules.chunking.CsvChunker import CsvChunker
+
+                            chunker_class = CsvChunker
+                        except ImportError:
+                            fmt.warning("CsvChunker not available, using TextChunker")

                    result = await cognee.cognify(
                        datasets=datasets,
--- a/cognee/cli/config.py
+++ b/cognee/cli/config.py
@ -26,7 +26,7 @@ SEARCH_TYPE_CHOICES = [
 ]

 # Chunker choices
-CHUNKER_CHOICES = ["TextChunker", "LangchainChunker"]
+CHUNKER_CHOICES = ["TextChunker", "LangchainChunker", "CsvChunker"]

 # Output format choices
 OUTPUT_FORMAT_CHOICES = ["json", "pretty", "simple"]
--- a/cognee/context_global_variables.py
+++ b/cognee/context_global_variables.py
@ -16,8 +16,8 @@ vector_db_config = ContextVar("vector_db_config", default=None)
 graph_db_config = ContextVar("graph_db_config", default=None)
 session_user = ContextVar("session_user", default=None)

-vector_dbs_with_multi_user_support = ["lancedb"]
-graph_dbs_with_multi_user_support = ["kuzu"]
+VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
+GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"]


 async def set_session_user_context_variable(user):
@ -28,8 +28,8 @@ def multi_user_support_possible():
    graph_db_config = get_graph_context_config()
    vector_db_config = get_vectordb_context_config()
    return (
-        graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support
-        and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support
+        graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
+        and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
    )


@ -69,8 +69,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_

    """

-    base_config = get_base_config()
-
    if not backend_access_control_enabled():
        return

@ -79,6 +77,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
    # To ensure permissions are enforced properly all datasets will have their own databases
    dataset_database = await get_or_create_dataset_database(dataset, user)

+    base_config = get_base_config()
    data_root_directory = os.path.join(
        base_config.data_root_directory, str(user.tenant_id or user.id)
    )
@ -88,15 +87,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_

    # Set vector and graph database configuration based on dataset database information
    vector_config = {
-        "vector_db_url": os.path.join(
-            databases_directory_path, dataset_database.vector_database_name
-        ),
-        "vector_db_key": "",
-        "vector_db_provider": "lancedb",
+        "vector_db_provider": dataset_database.vector_database_provider,
+        "vector_db_url": dataset_database.vector_database_url,
+        "vector_db_key": dataset_database.vector_database_key,
+        "vector_db_name": dataset_database.vector_database_name,
    }

    graph_config = {
-        "graph_database_provider": "kuzu",
+        "graph_database_provider": dataset_database.graph_database_provider,
+        "graph_database_url": dataset_database.graph_database_url,
+        "graph_database_name": dataset_database.graph_database_name,
+        "graph_database_key": dataset_database.graph_database_key,
        "graph_file_path": os.path.join(
            databases_directory_path, dataset_database.graph_database_name
        ),
--- a/cognee/eval_framework/Dockerfile
+++ b/cognee/eval_framework/Dockerfile
@ -0,0 +1,29 @@
+FROM python:3.11-slim
+
+# Set environment variables
+ENV PIP_NO_CACHE_DIR=true
+ENV PATH="${PATH}:/root/.poetry/bin"
+ENV PYTHONPATH=/app
+ENV SKIP_MIGRATIONS=true
+
+# System dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    libpq-dev \
+    git \
+    curl \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY pyproject.toml poetry.lock README.md /app/
+
+RUN pip install poetry
+
+RUN poetry config virtualenvs.create false
+
+RUN poetry install --extras distributed --extras evals --extras deepeval --no-root
+
+COPY cognee/ /app/cognee
+COPY distributed/ /app/distributed
--- a/cognee/eval_framework/answer_generation/answer_generation_executor.py
+++ b/cognee/eval_framework/answer_generation/answer_generation_executor.py
@ -35,6 +35,16 @@ class AnswerGeneratorExecutor:
            retrieval_context = await retriever.get_context(query_text)
            search_results = await retriever.get_completion(query_text, retrieval_context)

+            ############
+            #:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
+            if isinstance(retrieval_context, list):
+                retrieval_context = await retriever.convert_retrieved_objects_to_context(
+                    triplets=retrieval_context
+                )
+
+            if isinstance(search_results, str):
+                search_results = [search_results]
+            #############
            answer = {
                "question": query_text,
                "answer": search_results[0],
--- a/cognee/eval_framework/answer_generation/run_question_answering_module.py
+++ b/cognee/eval_framework/answer_generation/run_question_answering_module.py
@ -35,7 +35,7 @@ async def create_and_insert_answers_table(questions_payload):


 async def run_question_answering(
-    params: dict, system_prompt="answer_simple_question.txt", top_k: Optional[int] = None
+    params: dict, system_prompt="answer_simple_question_benchmark.txt", top_k: Optional[int] = None
 ) -> List[dict]:
    if params.get("answering_questions"):
        logger.info("Question answering started...")
--- a/cognee/eval_framework/eval_config.py
+++ b/cognee/eval_framework/eval_config.py
@ -14,7 +14,7 @@ class EvalConfig(BaseSettings):

    # Question answering params
    answering_questions: bool = True
-    qa_engine: str = "cognee_completion"  # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
+    qa_engine: str = "cognee_graph_completion"  # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'

    # Evaluation params
    evaluating_answers: bool = True
@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
        "EM",
        "f1",
    ]  # Use only 'correctness' for DirectLLM
-    deepeval_model: str = "gpt-5-mini"
+    deepeval_model: str = "gpt-4o-mini"

    # Metrics params
    calculate_metrics: bool = True
--- a/cognee/eval_framework/modal_run_eval.py
+++ b/cognee/eval_framework/modal_run_eval.py
@ -2,7 +2,6 @@ import modal
 import os
 import asyncio
 import datetime
-import hashlib
 import json
 from cognee.shared.logging_utils import get_logger
 from cognee.eval_framework.eval_config import EvalConfig
@ -10,6 +9,9 @@ from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_b
 from cognee.eval_framework.answer_generation.run_question_answering_module import (
    run_question_answering,
 )
+import pathlib
+from os import path
+from modal import Image
 from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation
 from cognee.eval_framework.metrics_dashboard import create_dashboard

@ -38,22 +40,19 @@ def read_and_combine_metrics(eval_params: dict) -> dict:

 app = modal.App("modal-run-eval")

-image = (
-    modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
-    .copy_local_file("pyproject.toml", "pyproject.toml")
-    .copy_local_file("poetry.lock", "poetry.lock")
-    .env(
-        {
-            "ENV": os.getenv("ENV"),
-            "LLM_API_KEY": os.getenv("LLM_API_KEY"),
-            "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
-        }
-    )
-    .pip_install("protobuf", "h2", "deepeval", "gdown", "plotly")
+image = Image.from_dockerfile(
+    path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(),
+    force_build=False,
+).add_local_python_source("cognee")
+
+
+@app.function(
+    image=image,
+    max_containers=10,
+    timeout=86400,
+    volumes={"/data": vol},
+    secrets=[modal.Secret.from_name("eval_secrets")],
 )
-
-
-@app.function(image=image, concurrency_limit=10, timeout=86400, volumes={"/data": vol})
 async def modal_run_eval(eval_params=None):
    """Runs evaluation pipeline and returns combined metrics results."""
    if eval_params is None:
@ -105,18 +104,7 @@ async def main():
    configs = [
        EvalConfig(
            task_getter_type="Default",
-            number_of_samples_in_corpus=10,
-            benchmark="HotPotQA",
-            qa_engine="cognee_graph_completion",
-            building_corpus_from_scratch=True,
-            answering_questions=True,
-            evaluating_answers=True,
-            calculate_metrics=True,
-            dashboard=True,
-        ),
-        EvalConfig(
-            task_getter_type="Default",
-            number_of_samples_in_corpus=10,
+            number_of_samples_in_corpus=25,
            benchmark="TwoWikiMultiHop",
            qa_engine="cognee_graph_completion",
            building_corpus_from_scratch=True,
@ -127,7 +115,7 @@ async def main():
        ),
        EvalConfig(
            task_getter_type="Default",
-            number_of_samples_in_corpus=10,
+            number_of_samples_in_corpus=25,
            benchmark="Musique",
            qa_engine="cognee_graph_completion",
            building_corpus_from_scratch=True,
--- a/cognee/infrastructure/databases/cache/config.py
+++ b/cognee/infrastructure/databases/cache/config.py
@ -1,6 +1,6 @@
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from functools import lru_cache
-from typing import Optional
+from typing import Optional, Literal


 class CacheConfig(BaseSettings):
@ -15,6 +15,7 @@ class CacheConfig(BaseSettings):
    - agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
    """

+    cache_backend: Literal["redis", "fs"] = "fs"
    caching: bool = False
    shared_kuzu_lock: bool = False
    cache_host: str = "localhost"
@ -28,6 +29,7 @@ class CacheConfig(BaseSettings):

    def to_dict(self) -> dict:
        return {
+            "cache_backend": self.cache_backend,
            "caching": self.caching,
            "shared_kuzu_lock": self.shared_kuzu_lock,
            "cache_host": self.cache_host,
--- a/cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py
+++ b/cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py
@ -0,0 +1,151 @@
+import asyncio
+import json
+import os
+from datetime import datetime
+import time
+import threading
+import diskcache as dc
+
+from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
+from cognee.infrastructure.databases.exceptions.exceptions import (
+    CacheConnectionError,
+    SharedKuzuLockRequiresRedisError,
+)
+from cognee.infrastructure.files.storage.get_storage_config import get_storage_config
+from cognee.shared.logging_utils import get_logger
+
+logger = get_logger("FSCacheAdapter")
+
+
+class FSCacheAdapter(CacheDBInterface):
+    def __init__(self):
+        default_key = "sessions_db"
+
+        storage_config = get_storage_config()
+        data_root_directory = storage_config["data_root_directory"]
+        cache_directory = os.path.join(data_root_directory, ".cognee_fs_cache", default_key)
+        os.makedirs(cache_directory, exist_ok=True)
+        self.cache = dc.Cache(directory=cache_directory)
+        self.cache.expire()
+
+        logger.debug(f"FSCacheAdapter initialized with cache directory: {cache_directory}")
+
+    def acquire_lock(self):
+        """Lock acquisition is not available for filesystem cache backend."""
+        message = "Shared Kuzu lock requires Redis cache backend."
+        logger.error(message)
+        raise SharedKuzuLockRequiresRedisError()
+
+    def release_lock(self):
+        """Lock release is not available for filesystem cache backend."""
+        message = "Shared Kuzu lock requires Redis cache backend."
+        logger.error(message)
+        raise SharedKuzuLockRequiresRedisError()
+
+    async def add_qa(
+        self,
+        user_id: str,
+        session_id: str,
+        question: str,
+        context: str,
+        answer: str,
+        ttl: int | None = 86400,
+    ):
+        try:
+            session_key = f"agent_sessions:{user_id}:{session_id}"
+
+            qa_entry = {
+                "time": datetime.utcnow().isoformat(),
+                "question": question,
+                "context": context,
+                "answer": answer,
+            }
+
+            existing_value = self.cache.get(session_key)
+            if existing_value is not None:
+                value: list = json.loads(existing_value)
+                value.append(qa_entry)
+            else:
+                value = [qa_entry]
+
+            self.cache.set(session_key, json.dumps(value), expire=ttl)
+        except Exception as e:
+            error_msg = f"Unexpected error while adding Q&A to diskcache: {str(e)}"
+            logger.error(error_msg)
+            raise CacheConnectionError(error_msg) from e
+
+    async def get_latest_qa(self, user_id: str, session_id: str, last_n: int = 5):
+        session_key = f"agent_sessions:{user_id}:{session_id}"
+        value = self.cache.get(session_key)
+        if value is None:
+            return None
+        entries = json.loads(value)
+        return entries[-last_n:] if len(entries) > last_n else entries
+
+    async def get_all_qas(self, user_id: str, session_id: str):
+        session_key = f"agent_sessions:{user_id}:{session_id}"
+        value = self.cache.get(session_key)
+        if value is None:
+            return None
+        return json.loads(value)
+
+    async def close(self):
+        if self.cache is not None:
+            self.cache.expire()
+            self.cache.close()
+
+
+async def main():
+    adapter = FSCacheAdapter()
+    session_id = "demo_session"
+    user_id = "demo_user_id"
+
+    print("\nAdding sample Q/A pairs...")
+    await adapter.add_qa(
+        user_id,
+        session_id,
+        "What is Redis?",
+        "Basic DB context",
+        "Redis is an in-memory data store.",
+    )
+    await adapter.add_qa(
+        user_id,
+        session_id,
+        "Who created Redis?",
+        "Historical context",
+        "Salvatore Sanfilippo (antirez).",
+    )
+
+    print("\nLatest QA:")
+    latest = await adapter.get_latest_qa(user_id, session_id)
+    print(json.dumps(latest, indent=2))
+
+    print("\nLast 2 QAs:")
+    last_two = await adapter.get_latest_qa(user_id, session_id, last_n=2)
+    print(json.dumps(last_two, indent=2))
+
+    session_id = "session_expire_demo"
+
+    await adapter.add_qa(
+        user_id,
+        session_id,
+        "What is Redis?",
+        "Database context",
+        "Redis is an in-memory data store.",
+    )
+
+    await adapter.add_qa(
+        user_id,
+        session_id,
+        "Who created Redis?",
+        "History context",
+        "Salvatore Sanfilippo (antirez).",
+    )
+
+    print(await adapter.get_all_qas(user_id, session_id))
+
+    await adapter.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/cognee/infrastructure/databases/cache/get_cache_engine.py
+++ b/cognee/infrastructure/databases/cache/get_cache_engine.py
@ -1,9 +1,11 @@
 """Factory to get the appropriate cache coordination engine (e.g., Redis)."""

 from functools import lru_cache
+import os
 from typing import Optional
 from cognee.infrastructure.databases.cache.config import get_cache_config
 from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
+from cognee.infrastructure.databases.cache.fscache.FsCacheAdapter import FSCacheAdapter

 config = get_cache_config()

@ -33,20 +35,28 @@ def create_cache_engine(

    Returns:
    --------
-    - CacheDBInterface: An instance of the appropriate cache adapter. :TODO: Now we support only Redis. later if we add more here we can split the logic
+    - CacheDBInterface: An instance of the appropriate cache adapter.
    """
    if config.caching:
        from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter

-        return RedisAdapter(
-            host=cache_host,
-            port=cache_port,
-            username=cache_username,
-            password=cache_password,
-            lock_name=lock_key,
-            timeout=agentic_lock_expire,
-            blocking_timeout=agentic_lock_timeout,
-        )
+        if config.cache_backend == "redis":
+            return RedisAdapter(
+                host=cache_host,
+                port=cache_port,
+                username=cache_username,
+                password=cache_password,
+                lock_name=lock_key,
+                timeout=agentic_lock_expire,
+                blocking_timeout=agentic_lock_timeout,
+            )
+        elif config.cache_backend == "fs":
+            return FSCacheAdapter()
+        else:
+            raise ValueError(
+                f"Unsupported cache backend: '{config.cache_backend}'. "
+                f"Supported backends are: 'redis', 'fs'"
+            )
    else:
        return None

--- a/cognee/infrastructure/databases/exceptions/exceptions.py
+++ b/cognee/infrastructure/databases/exceptions/exceptions.py
@ -148,3 +148,19 @@ class CacheConnectionError(CogneeConfigurationError):
        status_code: int = status.HTTP_503_SERVICE_UNAVAILABLE,
    ):
        super().__init__(message, name, status_code)
+
+
+class SharedKuzuLockRequiresRedisError(CogneeConfigurationError):
+    """
+    Raised when shared Kuzu locking is requested without configuring the Redis backend.
+    """
+
+    def __init__(
+        self,
+        message: str = (
+            "Shared Kuzu lock requires Redis cache backend. Configure Redis to enable shared Kuzu locking."
+        ),
+        name: str = "SharedKuzuLockRequiresRedisError",
+        status_code: int = status.HTTP_400_BAD_REQUEST,
+    ):
+        super().__init__(message, name, status_code)
--- a/cognee/infrastructure/databases/graph/config.py
+++ b/cognee/infrastructure/databases/graph/config.py
@ -26,6 +26,7 @@ class GraphConfig(BaseSettings):
    - graph_database_username
    - graph_database_password
    - graph_database_port
+    - graph_database_key
    - graph_file_path
    - graph_model
    - graph_topology
@ -41,6 +42,7 @@ class GraphConfig(BaseSettings):
    graph_database_username: str = ""
    graph_database_password: str = ""
    graph_database_port: int = 123
+    graph_database_key: str = ""
    graph_file_path: str = ""
    graph_filename: str = ""
    graph_model: object = KnowledgeGraph
@ -90,6 +92,7 @@ class GraphConfig(BaseSettings):
            "graph_database_username": self.graph_database_username,
            "graph_database_password": self.graph_database_password,
            "graph_database_port": self.graph_database_port,
+            "graph_database_key": self.graph_database_key,
            "graph_file_path": self.graph_file_path,
            "graph_model": self.graph_model,
            "graph_topology": self.graph_topology,
@ -116,6 +119,7 @@ class GraphConfig(BaseSettings):
            "graph_database_username": self.graph_database_username,
            "graph_database_password": self.graph_database_password,
            "graph_database_port": self.graph_database_port,
+            "graph_database_key": self.graph_database_key,
            "graph_file_path": self.graph_file_path,
        }

--- a/cognee/infrastructure/databases/graph/get_graph_engine.py
+++ b/cognee/infrastructure/databases/graph/get_graph_engine.py
@ -33,6 +33,7 @@ def create_graph_engine(
    graph_database_username="",
    graph_database_password="",
    graph_database_port="",
+    graph_database_key="",
 ):
    """
    Create a graph engine based on the specified provider type.
@ -69,6 +70,7 @@ def create_graph_engine(
            graph_database_url=graph_database_url,
            graph_database_username=graph_database_username,
            graph_database_password=graph_database_password,
+            database_name=graph_database_name,
        )

    if graph_database_provider == "neo4j":
--- a/cognee/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognee/infrastructure/databases/graph/graph_db_interface.py
@ -398,3 +398,18 @@ class GraphDBInterface(ABC):
            - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
        """
        raise NotImplementedError
+
+    @abstractmethod
+    async def get_filtered_graph_data(
+        self, attribute_filters: List[Dict[str, List[Union[str, int]]]]
+    ) -> Tuple[List[Node], List[EdgeData]]:
+        """
+        Retrieve nodes and edges filtered by the provided attribute criteria.
+
+        Parameters:
+        -----------
+
+            - attribute_filters: A list of dictionaries where keys are attribute names and values
+              are lists of attribute values to filter by.
+        """
+        raise NotImplementedError
--- a/cognee/infrastructure/databases/graph/kuzu/adapter.py
+++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py
@ -12,6 +12,7 @@ from contextlib import asynccontextmanager
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, Any, List, Union, Optional, Tuple, Type

+from cognee.exceptions import CogneeValidationError
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.utils.run_sync import run_sync
 from cognee.infrastructure.files.storage import get_file_storage
@ -1186,6 +1187,11 @@ class KuzuAdapter(GraphDBInterface):
              A tuple with two elements: a list of tuples of (node_id, properties) and a list of
              tuples of (source_id, target_id, relationship_name, properties).
        """
+
+        import time
+
+        start_time = time.time()
+
        try:
            nodes_query = """
            MATCH (n:Node)
@ -1249,6 +1255,11 @@ class KuzuAdapter(GraphDBInterface):
                            },
                        )
                    )
+
+            retrieval_time = time.time() - start_time
+            logger.info(
+                f"Retrieved {len(nodes)} nodes and {len(edges)} edges in {retrieval_time:.2f} seconds"
+            )
            return formatted_nodes, formatted_edges
        except Exception as e:
            logger.error(f"Failed to get graph data: {e}")
@ -1417,6 +1428,92 @@ class KuzuAdapter(GraphDBInterface):
                formatted_edges.append((source_id, target_id, rel_type, props))
        return formatted_nodes, formatted_edges

+    async def get_id_filtered_graph_data(self, target_ids: list[str]):
+        """
+        Retrieve graph data filtered by specific node IDs, including their direct neighbors
+        and only edges where one endpoint matches those IDs.
+
+        Returns:
+            nodes: List[dict]   -> Each dict includes "id" and all node properties
+            edges: List[dict]   -> Each dict includes "source", "target", "type", "properties"
+        """
+        import time
+
+        start_time = time.time()
+
+        try:
+            if not target_ids:
+                logger.warning("No target IDs provided for ID-filtered graph retrieval.")
+                return [], []
+
+            if not all(isinstance(x, str) for x in target_ids):
+                raise CogneeValidationError("target_ids must be a list of strings")
+
+            query = """
+            MATCH (n:Node)-[r]->(m:Node)
+            WHERE n.id IN $target_ids OR m.id IN $target_ids
+            RETURN n.id, {
+                name: n.name,
+                type: n.type,
+                properties: n.properties
+            }, m.id, {
+                name: m.name,
+                type: m.type,
+                properties: m.properties
+            }, r.relationship_name, r.properties
+            """
+
+            result = await self.query(query, {"target_ids": target_ids})
+
+            if not result:
+                logger.info("No data returned for the supplied IDs")
+                return [], []
+
+            nodes_dict = {}
+            edges = []
+
+            for n_id, n_props, m_id, m_props, r_type, r_props_raw in result:
+                if n_props.get("properties"):
+                    try:
+                        additional_props = json.loads(n_props["properties"])
+                        n_props.update(additional_props)
+                        del n_props["properties"]
+                    except json.JSONDecodeError:
+                        logger.warning(f"Failed to parse properties JSON for node {n_id}")
+
+                if m_props.get("properties"):
+                    try:
+                        additional_props = json.loads(m_props["properties"])
+                        m_props.update(additional_props)
+                        del m_props["properties"]
+                    except json.JSONDecodeError:
+                        logger.warning(f"Failed to parse properties JSON for node {m_id}")
+
+                nodes_dict[n_id] = (n_id, n_props)
+                nodes_dict[m_id] = (m_id, m_props)
+
+                edge_props = {}
+                if r_props_raw:
+                    try:
+                        edge_props = json.loads(r_props_raw)
+                    except (json.JSONDecodeError, TypeError):
+                        logger.warning(f"Failed to parse edge properties for {n_id}->{m_id}")
+
+                source_id = edge_props.get("source_node_id", n_id)
+                target_id = edge_props.get("target_node_id", m_id)
+                edges.append((source_id, target_id, r_type, edge_props))
+
+            retrieval_time = time.time() - start_time
+            logger.info(
+                f"ID-filtered retrieval: {len(nodes_dict)} nodes and {len(edges)} edges in {retrieval_time:.2f}s"
+            )
+
+            return list(nodes_dict.values()), edges
+
+        except Exception as e:
+            logger.error(f"Error during ID-filtered graph data retrieval: {str(e)}")
+            raise
+
    async def get_graph_metrics(self, include_optional=False) -> Dict[str, Any]:
        """
        Get metrics on graph structure and connectivity.
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@ -964,6 +964,63 @@ class Neo4jAdapter(GraphDBInterface):
            logger.error(f"Error during graph data retrieval: {str(e)}")
            raise

+    async def get_id_filtered_graph_data(self, target_ids: list[str]):
+        """
+        Retrieve graph data filtered by specific node IDs, including their direct neighbors
+        and only edges where one endpoint matches those IDs.
+
+        This version uses a single Cypher query for efficiency.
+        """
+        import time
+
+        start_time = time.time()
+
+        try:
+            if not target_ids:
+                logger.warning("No target IDs provided for ID-filtered graph retrieval.")
+                return [], []
+
+            query = """
+            MATCH ()-[r]-()
+            WHERE startNode(r).id IN $target_ids
+               OR endNode(r).id IN $target_ids
+            WITH DISTINCT r, startNode(r) AS a, endNode(r) AS b
+            RETURN
+                properties(a) AS n_properties,
+                properties(b) AS m_properties,
+                type(r) AS type,
+                properties(r) AS properties
+            """
+
+            result = await self.query(query, {"target_ids": target_ids})
+
+            nodes_dict = {}
+            edges = []
+
+            for record in result:
+                n_props = record["n_properties"]
+                m_props = record["m_properties"]
+                r_props = record["properties"]
+                r_type = record["type"]
+
+                nodes_dict[n_props["id"]] = (n_props["id"], n_props)
+                nodes_dict[m_props["id"]] = (m_props["id"], m_props)
+
+                source_id = r_props.get("source_node_id", n_props["id"])
+                target_id = r_props.get("target_node_id", m_props["id"])
+                edges.append((source_id, target_id, r_type, r_props))
+
+            retrieval_time = time.time() - start_time
+            logger.info(
+                f"ID-filtered retrieval: {len(nodes_dict)} nodes and {len(edges)} edges in {retrieval_time:.2f}s"
+            )
+
+            return list(nodes_dict.values()), edges
+
+        except Exception as e:
+            logger.error(f"Error during ID-filtered graph data retrieval: {str(e)}")
+            raise
+
    async def get_nodeset_subgraph(
        self, node_type: Type[Any], node_name: List[str]
    ) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
--- a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py
+++ b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py
@ -416,6 +416,15 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
        self._client.query(f"MATCH (n :{self._VECTOR_NODE_LABEL}) DETACH DELETE n")
        pass

+    async def is_empty(self) -> bool:
+        query = """
+        MATCH (n)
+        RETURN true
+        LIMIT 1;
+        """
+        query_result = await self._client.query(query)
+        return len(query_result) == 0
+
    @staticmethod
    def _get_scored_result(
        item: dict, with_vector: bool = False, with_score: bool = False
--- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py
+++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py
@ -1,11 +1,15 @@
+import os
 from uuid import UUID
 from typing import Union

 from sqlalchemy import select
 from sqlalchemy.exc import IntegrityError
-from cognee.modules.data.methods import create_dataset

+from cognee.base_config import get_base_config
+from cognee.modules.data.methods import create_dataset
 from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.infrastructure.databases.vector import get_vectordb_config
+from cognee.infrastructure.databases.graph.config import get_graph_config
 from cognee.modules.data.methods import get_unique_dataset_id
 from cognee.modules.users.models import DatasetDatabase
 from cognee.modules.users.models import User
@ -32,8 +36,32 @@ async def get_or_create_dataset_database(

    dataset_id = await get_unique_dataset_id(dataset, user)

-    vector_db_name = f"{dataset_id}.lance.db"
-    graph_db_name = f"{dataset_id}.pkl"
+    vector_config = get_vectordb_config()
+    graph_config = get_graph_config()
+
+    # Note: for hybrid databases both graph and vector DB name have to be the same
+    if graph_config.graph_database_provider == "kuzu":
+        graph_db_name = f"{dataset_id}.pkl"
+    else:
+        graph_db_name = f"{dataset_id}"
+
+    if vector_config.vector_db_provider == "lancedb":
+        vector_db_name = f"{dataset_id}.lance.db"
+    else:
+        vector_db_name = f"{dataset_id}"
+
+    base_config = get_base_config()
+    databases_directory_path = os.path.join(
+        base_config.system_root_directory, "databases", str(user.id)
+    )
+
+    # Determine vector database URL
+    if vector_config.vector_db_provider == "lancedb":
+        vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name)
+    else:
+        vector_db_url = vector_config.vector_database_url
+
+    # Determine graph database URL

    async with db_engine.get_async_session() as session:
        # Create dataset if it doesn't exist
@ -55,6 +83,12 @@ async def get_or_create_dataset_database(
            dataset_id=dataset_id,
            vector_database_name=vector_db_name,
            graph_database_name=graph_db_name,
+            vector_database_provider=vector_config.vector_db_provider,
+            graph_database_provider=graph_config.graph_database_provider,
+            vector_database_url=vector_db_url,
+            graph_database_url=graph_config.graph_database_url,
+            vector_database_key=vector_config.vector_db_key,
+            graph_database_key=graph_config.graph_database_key,
        )

        try:
--- a/cognee/infrastructure/databases/vector/config.py
+++ b/cognee/infrastructure/databases/vector/config.py
@ -18,12 +18,14 @@ class VectorConfig(BaseSettings):
    Instance variables:
    - vector_db_url: The URL of the vector database.
    - vector_db_port: The port for the vector database.
+    - vector_db_name: The name of the vector database.
    - vector_db_key: The key for accessing the vector database.
    - vector_db_provider: The provider for the vector database.
    """

    vector_db_url: str = ""
    vector_db_port: int = 1234
+    vector_db_name: str = ""
    vector_db_key: str = ""
    vector_db_provider: str = "lancedb"

@ -58,6 +60,7 @@ class VectorConfig(BaseSettings):
        return {
            "vector_db_url": self.vector_db_url,
            "vector_db_port": self.vector_db_port,
+            "vector_db_name": self.vector_db_name,
            "vector_db_key": self.vector_db_key,
            "vector_db_provider": self.vector_db_provider,
        }
--- a/cognee/infrastructure/databases/vector/create_vector_engine.py
+++ b/cognee/infrastructure/databases/vector/create_vector_engine.py
@ -1,5 +1,6 @@
 from .supported_databases import supported_databases
 from .embeddings import get_embedding_engine
+from cognee.infrastructure.databases.graph.config import get_graph_context_config

 from functools import lru_cache

@ -8,6 +9,7 @@ from functools import lru_cache
 def create_vector_engine(
    vector_db_provider: str,
    vector_db_url: str,
+    vector_db_name: str,
    vector_db_port: str = "",
    vector_db_key: str = "",
 ):
@ -27,6 +29,7 @@ def create_vector_engine(
        - vector_db_url (str): The URL for the vector database instance.
        - vector_db_port (str): The port for the vector database instance. Required for some
          providers.
+        - vector_db_name (str): The name of the vector database instance.
        - vector_db_key (str): The API key or access token for the vector database instance.
        - vector_db_provider (str): The name of the vector database provider to use (e.g.,
          'pgvector').
@ -45,6 +48,7 @@ def create_vector_engine(
            url=vector_db_url,
            api_key=vector_db_key,
            embedding_engine=embedding_engine,
+            database_name=vector_db_name,
        )

    if vector_db_provider.lower() == "pgvector":
--- a/cognee/infrastructure/files/utils/guess_file_type.py
+++ b/cognee/infrastructure/files/utils/guess_file_type.py
@ -55,6 +55,10 @@ def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type
        file_type = Type("text/plain", "txt")
        return file_type

+    if ext in [".csv"]:
+        file_type = Type("text/csv", "csv")
+        return file_type
+
    file_type = filetype.guess(file)

    # If file type could not be determined consider it a plain text file as they don't have magic number encoding
--- a/cognee/infrastructure/llm/config.py
+++ b/cognee/infrastructure/llm/config.py
@ -38,6 +38,7 @@ class LLMConfig(BaseSettings):
    """

    structured_output_framework: str = "instructor"
+    llm_instructor_mode: str = ""
    llm_provider: str = "openai"
    llm_model: str = "openai/gpt-5-mini"
    llm_endpoint: str = ""
@ -216,6 +217,7 @@ class LLMConfig(BaseSettings):
              instance.
        """
        return {
+            "llm_instructor_mode": self.llm_instructor_mode.lower(),
            "provider": self.llm_provider,
            "model": self.llm_model,
            "endpoint": self.llm_endpoint,
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
@ -28,13 +28,16 @@ class AnthropicAdapter(LLMInterface):

    name = "Anthropic"
    model: str
+    default_instructor_mode = "anthropic_tools"

-    def __init__(self, max_completion_tokens: int, model: str = None):
+    def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None):
        import anthropic

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.patch(
            create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
-            mode=instructor.Mode.ANTHROPIC_TOOLS,
+            mode=instructor.Mode(self.instructor_mode),
        )

        self.model = model
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@ -41,6 +41,7 @@ class GeminiAdapter(LLMInterface):
    name: str
    model: str
    api_key: str
+    default_instructor_mode = "json_mode"

    def __init__(
        self,
@ -49,6 +50,7 @@ class GeminiAdapter(LLMInterface):
        model: str,
        api_version: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
@ -63,7 +65,11 @@ class GeminiAdapter(LLMInterface):
        self.fallback_api_key = fallback_api_key
        self.fallback_endpoint = fallback_endpoint

-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )

    @retry(
        stop=stop_after_delay(128),
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
@ -41,6 +41,7 @@ class GenericAPIAdapter(LLMInterface):
    name: str
    model: str
    api_key: str
+    default_instructor_mode = "json_mode"

    def __init__(
        self,
@ -49,6 +50,7 @@ class GenericAPIAdapter(LLMInterface):
        model: str,
        name: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
@ -63,7 +65,11 @@ class GenericAPIAdapter(LLMInterface):
        self.fallback_api_key = fallback_api_key
        self.fallback_endpoint = fallback_endpoint

-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )

    @retry(
        stop=stop_after_delay(128),
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
@ -81,6 +81,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            model=llm_config.llm_model,
            transcription_model=llm_config.transcription_model,
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
            streaming=llm_config.llm_streaming,
            fallback_api_key=llm_config.fallback_api_key,
            fallback_endpoint=llm_config.fallback_endpoint,
@ -101,6 +102,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            llm_config.llm_model,
            "Ollama",
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.ANTHROPIC:
@ -109,7 +111,9 @@ def get_llm_client(raise_api_key_error: bool = True):
        )

        return AnthropicAdapter(
-            max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
+            max_completion_tokens=max_completion_tokens,
+            model=llm_config.llm_model,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.CUSTOM:
@ -126,6 +130,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            llm_config.llm_model,
            "Custom",
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
            fallback_api_key=llm_config.fallback_api_key,
            fallback_endpoint=llm_config.fallback_endpoint,
            fallback_model=llm_config.fallback_model,
@ -145,6 +150,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            max_completion_tokens=max_completion_tokens,
            endpoint=llm_config.llm_endpoint,
            api_version=llm_config.llm_api_version,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.MISTRAL:
@ -160,21 +166,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            model=llm_config.llm_model,
            max_completion_tokens=max_completion_tokens,
            endpoint=llm_config.llm_endpoint,
-        )
-
-    elif provider == LLMProvider.MISTRAL:
-        if llm_config.llm_api_key is None:
-            raise LLMAPIKeyNotSetError()
-
-        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.mistral.adapter import (
-            MistralAdapter,
-        )
-
-        return MistralAdapter(
-            api_key=llm_config.llm_api_key,
-            model=llm_config.llm_model,
-            max_completion_tokens=max_completion_tokens,
-            endpoint=llm_config.llm_endpoint,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    else:
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
@ -37,16 +37,26 @@ class MistralAdapter(LLMInterface):
    model: str
    api_key: str
    max_completion_tokens: int
+    default_instructor_mode = "mistral_tools"

-    def __init__(self, api_key: str, model: str, max_completion_tokens: int, endpoint: str = None):
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        max_completion_tokens: int,
+        endpoint: str = None,
+        instructor_mode: str = None,
+    ):
        from mistralai import Mistral

        self.model = model
        self.max_completion_tokens = max_completion_tokens

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.from_litellm(
            litellm.acompletion,
-            mode=instructor.Mode.MISTRAL_TOOLS,
+            mode=instructor.Mode(self.instructor_mode),
            api_key=get_llm_config().llm_api_key,
        )

--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
@ -42,8 +42,16 @@ class OllamaAPIAdapter(LLMInterface):
    - aclient
    """

+    default_instructor_mode = "json_mode"
+
    def __init__(
-        self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int
+        self,
+        endpoint: str,
+        api_key: str,
+        model: str,
+        name: str,
+        max_completion_tokens: int,
+        instructor_mode: str = None,
    ):
        self.name = name
        self.model = model
@ -51,8 +59,11 @@ class OllamaAPIAdapter(LLMInterface):
        self.endpoint = endpoint
        self.max_completion_tokens = max_completion_tokens

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.from_openai(
-            OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
+            OpenAI(base_url=self.endpoint, api_key=self.api_key),
+            mode=instructor.Mode(self.instructor_mode),
        )

    @retry(
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@ -56,6 +56,7 @@ class OpenAIAdapter(LLMInterface):
    model: str
    api_key: str
    api_version: str
+    default_instructor_mode = "json_schema_mode"

    MAX_RETRIES = 5

@ -69,19 +70,21 @@ class OpenAIAdapter(LLMInterface):
        model: str,
        transcription_model: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        streaming: bool = False,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
    ):
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
        # TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs.
        #       Make sure all new gpt models will work with this mode as well.
        if "gpt-5" in model:
            self.aclient = instructor.from_litellm(
-                litellm.acompletion, mode=instructor.Mode.JSON_SCHEMA
+                litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
            )
            self.client = instructor.from_litellm(
-                litellm.completion, mode=instructor.Mode.JSON_SCHEMA
+                litellm.completion, mode=instructor.Mode(self.instructor_mode)
            )
        else:
            self.aclient = instructor.from_litellm(litellm.acompletion)
--- a/cognee/infrastructure/loaders/LoaderEngine.py
+++ b/cognee/infrastructure/loaders/LoaderEngine.py
@ -31,6 +31,7 @@ class LoaderEngine:
            "pypdf_loader",
            "image_loader",
            "audio_loader",
+            "csv_loader",
            "unstructured_loader",
            "advanced_pdf_loader",
        ]
--- a/cognee/infrastructure/loaders/core/init.py
+++ b/cognee/infrastructure/loaders/core/init.py
@ -3,5 +3,6 @@
 from .text_loader import TextLoader
 from .audio_loader import AudioLoader
 from .image_loader import ImageLoader
+from .csv_loader import CsvLoader

-__all__ = ["TextLoader", "AudioLoader", "ImageLoader"]
+__all__ = ["TextLoader", "AudioLoader", "ImageLoader", "CsvLoader"]
--- a/cognee/infrastructure/loaders/core/csv_loader.py
+++ b/cognee/infrastructure/loaders/core/csv_loader.py
@ -0,0 +1,93 @@
+import os
+from typing import List
+import csv
+from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
+from cognee.infrastructure.files.storage import get_file_storage, get_storage_config
+from cognee.infrastructure.files.utils.get_file_metadata import get_file_metadata
+
+
+class CsvLoader(LoaderInterface):
+    """
+    Core CSV file loader that handles basic CSV file formats.
+    """
+
+    @property
+    def supported_extensions(self) -> List[str]:
+        """Supported text file extensions."""
+        return [
+            "csv",
+        ]
+
+    @property
+    def supported_mime_types(self) -> List[str]:
+        """Supported MIME types for text content."""
+        return [
+            "text/csv",
+        ]
+
+    @property
+    def loader_name(self) -> str:
+        """Unique identifier for this loader."""
+        return "csv_loader"
+
+    def can_handle(self, extension: str, mime_type: str) -> bool:
+        """
+        Check if this loader can handle the given file.
+
+        Args:
+            extension: File extension
+            mime_type: Optional MIME type
+
+        Returns:
+            True if file can be handled, False otherwise
+        """
+        if extension in self.supported_extensions and mime_type in self.supported_mime_types:
+            return True
+
+        return False
+
+    async def load(self, file_path: str, encoding: str = "utf-8", **kwargs):
+        """
+        Load and process the csv file.
+
+        Args:
+            file_path: Path to the file to load
+            encoding: Text encoding to use (default: utf-8)
+            **kwargs: Additional configuration (unused)
+
+        Returns:
+            LoaderResult containing the file content and metadata
+
+        Raises:
+            FileNotFoundError: If file doesn't exist
+            UnicodeDecodeError: If file cannot be decoded with specified encoding
+            OSError: If file cannot be read
+        """
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        with open(file_path, "rb") as f:
+            file_metadata = await get_file_metadata(f)
+        # Name ingested file of current loader based on original file content hash
+        storage_file_name = "text_" + file_metadata["content_hash"] + ".txt"
+
+        row_texts = []
+        row_index = 1
+
+        with open(file_path, "r", encoding=encoding, newline="") as file:
+            reader = csv.DictReader(file)
+            for row in reader:
+                pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()]
+                row_text = ", ".join(pairs)
+                row_texts.append(f"Row {row_index}:\n{row_text}\n")
+                row_index += 1
+
+        content = "\n".join(row_texts)
+
+        storage_config = get_storage_config()
+        data_root_directory = storage_config["data_root_directory"]
+        storage = get_file_storage(data_root_directory)
+
+        full_file_path = await storage.store(storage_file_name, content)
+
+        return full_file_path
--- a/cognee/infrastructure/loaders/core/text_loader.py
+++ b/cognee/infrastructure/loaders/core/text_loader.py
@ -16,7 +16,7 @@ class TextLoader(LoaderInterface):
    @property
    def supported_extensions(self) -> List[str]:
        """Supported text file extensions."""
-        return ["txt", "md", "csv", "json", "xml", "yaml", "yml", "log"]
+        return ["txt", "md", "json", "xml", "yaml", "yml", "log"]

    @property
    def supported_mime_types(self) -> List[str]:
@ -24,7 +24,6 @@ class TextLoader(LoaderInterface):
        return [
            "text/plain",
            "text/markdown",
-            "text/csv",
            "application/json",
            "text/xml",
            "application/xml",
--- a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
+++ b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
@ -227,12 +227,3 @@ class AdvancedPdfLoader(LoaderInterface):
        if value is None:
            return ""
        return str(value).replace("\xa0", " ").strip()
-
-
-if __name__ == "__main__":
-    loader = AdvancedPdfLoader()
-    asyncio.run(
-        loader.load(
-            "/Users/xiaotao/work/cognee/cognee/infrastructure/loaders/external/attention_is_all_you_need.pdf"
-        )
-    )
--- a/cognee/infrastructure/loaders/supported_loaders.py
+++ b/cognee/infrastructure/loaders/supported_loaders.py
@ -1,5 +1,5 @@
 from cognee.infrastructure.loaders.external import PyPdfLoader
-from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader
+from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader, CsvLoader

 # Registry for loader implementations
 supported_loaders = {
@ -7,6 +7,7 @@ supported_loaders = {
    TextLoader.loader_name: TextLoader,
    ImageLoader.loader_name: ImageLoader,
    AudioLoader.loader_name: AudioLoader,
+    CsvLoader.loader_name: CsvLoader,
 }

 # Try adding optional loaders
--- a/cognee/modules/chunking/CsvChunker.py
+++ b/cognee/modules/chunking/CsvChunker.py
@ -0,0 +1,35 @@
+from cognee.shared.logging_utils import get_logger
+
+
+from cognee.tasks.chunks import chunk_by_row
+from cognee.modules.chunking.Chunker import Chunker
+from .models.DocumentChunk import DocumentChunk
+
+logger = get_logger()
+
+
+class CsvChunker(Chunker):
+    async def read(self):
+        async for content_text in self.get_text():
+            if content_text is None:
+                continue
+
+            for chunk_data in chunk_by_row(content_text, self.max_chunk_size):
+                if chunk_data["chunk_size"] <= self.max_chunk_size:
+                    yield DocumentChunk(
+                        id=chunk_data["chunk_id"],
+                        text=chunk_data["text"],
+                        chunk_size=chunk_data["chunk_size"],
+                        is_part_of=self.document,
+                        chunk_index=self.chunk_index,
+                        cut_type=chunk_data["cut_type"],
+                        contains=[],
+                        metadata={
+                            "index_fields": ["text"],
+                        },
+                    )
+                    self.chunk_index += 1
+                else:
+                    raise ValueError(
+                        f"Chunk size is larger than the maximum chunk size {self.max_chunk_size}"
+                    )
--- a/cognee/modules/data/methods/init.py
+++ b/cognee/modules/data/methods/init.py
@ -10,6 +10,7 @@ from .get_authorized_dataset import get_authorized_dataset
 from .get_authorized_dataset_by_name import get_authorized_dataset_by_name
 from .get_data import get_data
 from .get_unique_dataset_id import get_unique_dataset_id
+from .get_unique_data_id import get_unique_data_id
 from .get_authorized_existing_datasets import get_authorized_existing_datasets
 from .get_dataset_ids import get_dataset_ids

--- a/cognee/modules/data/methods/create_dataset.py
+++ b/cognee/modules/data/methods/create_dataset.py
@ -16,14 +16,16 @@ async def create_dataset(dataset_name: str, user: User, session: AsyncSession) -
            .options(joinedload(Dataset.data))
            .filter(Dataset.name == dataset_name)
            .filter(Dataset.owner_id == owner_id)
+            .filter(Dataset.tenant_id == user.tenant_id)
        )
    ).first()

    if dataset is None:
        # Dataset id should be generated based on dataset_name and owner_id/user so multiple users can use the same dataset_name
        dataset_id = await get_unique_dataset_id(dataset_name=dataset_name, user=user)
-        dataset = Dataset(id=dataset_id, name=dataset_name, data=[])
-        dataset.owner_id = owner_id
+        dataset = Dataset(
+            id=dataset_id, name=dataset_name, data=[], owner_id=owner_id, tenant_id=user.tenant_id
+        )

        session.add(dataset)

--- a/cognee/modules/data/methods/get_dataset_ids.py
+++ b/cognee/modules/data/methods/get_dataset_ids.py
@ -27,7 +27,11 @@ async def get_dataset_ids(datasets: Union[list[str], list[UUID]], user):
            # Get all user owned dataset objects (If a user wants to write to a dataset he is not the owner of it must be provided through UUID.)
            user_datasets = await get_datasets(user.id)
            # Filter out non name mentioned datasets
-            dataset_ids = [dataset.id for dataset in user_datasets if dataset.name in datasets]
+            dataset_ids = [dataset for dataset in user_datasets if dataset.name in datasets]
+            # Filter out non current tenant datasets
+            dataset_ids = [
+                dataset.id for dataset in dataset_ids if dataset.tenant_id == user.tenant_id
+            ]
        else:
            raise DatasetTypeError(
                f"One or more of the provided dataset types is not handled: f{datasets}"
--- a/cognee/modules/data/methods/get_unique_data_id.py
+++ b/cognee/modules/data/methods/get_unique_data_id.py
@ -0,0 +1,68 @@
+from uuid import uuid5, NAMESPACE_OID, UUID
+from sqlalchemy import select
+
+from cognee.modules.data.models.Data import Data
+from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.modules.users.models import User
+
+
+async def get_unique_data_id(data_identifier: str, user: User) -> UUID:
+    """
+    Function returns a unique UUID for data based on data identifier, user id and tenant id.
+    If data with legacy ID exists, return that ID to maintain compatibility.
+
+    Args:
+        data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.)
+        user: User object adding the data
+        tenant_id: UUID of the tenant for which data is being added
+
+    Returns:
+        UUID: Unique identifier for the data
+    """
+
+    def _get_deprecated_unique_data_id(data_identifier: str, user: User) -> UUID:
+        """
+        Deprecated function, returns a unique UUID for data based on data identifier and user id.
+        Needed to support legacy data without tenant information.
+        Args:
+            data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.)
+            user: User object adding the data
+
+        Returns:
+            UUID: Unique identifier for the data
+        """
+        # return UUID hash of file contents + owner id + tenant_id
+        return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}")
+
+    def _get_modern_unique_data_id(data_identifier: str, user: User) -> UUID:
+        """
+        Function returns a unique UUID for data based on data identifier, user id and tenant id.
+        Args:
+            data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.)
+            user: User object adding the data
+            tenant_id: UUID of the tenant for which data is being added
+
+        Returns:
+            UUID: Unique identifier for the data
+        """
+        # return UUID hash of file contents + owner id + tenant_id
+        return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}{str(user.tenant_id)}")
+
+    # Get all possible data_id values
+    data_id = {
+        "modern_data_id": _get_modern_unique_data_id(data_identifier=data_identifier, user=user),
+        "legacy_data_id": _get_deprecated_unique_data_id(
+            data_identifier=data_identifier, user=user
+        ),
+    }
+
+    # Check if data item with legacy_data_id exists, if so use that one, else use modern_data_id
+    db_engine = get_relational_engine()
+    async with db_engine.get_async_session() as session:
+        legacy_data_point = (
+            await session.execute(select(Data).filter(Data.id == data_id["legacy_data_id"]))
+        ).scalar_one_or_none()
+
+        if not legacy_data_point:
+            return data_id["modern_data_id"]
+        return data_id["legacy_data_id"]
--- a/cognee/modules/data/methods/get_unique_dataset_id.py
+++ b/cognee/modules/data/methods/get_unique_dataset_id.py
@ -1,9 +1,71 @@
 from uuid import UUID, uuid5, NAMESPACE_OID
-from cognee.modules.users.models import User
 from typing import Union
+from sqlalchemy import select
+
+from cognee.modules.data.models.Dataset import Dataset
+from cognee.modules.users.models import User
+from cognee.infrastructure.databases.relational import get_relational_engine


 async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID:
-    if isinstance(dataset_name, UUID):
-        return dataset_name
-    return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
+    """
+    Function returns a unique UUID for dataset based on dataset name, user id and tenant id.
+    If dataset with legacy ID exists, return that ID to maintain compatibility.
+
+    Args:
+        dataset_name: string representing the dataset name
+        user: User object adding the dataset
+        tenant_id: UUID of the tenant for which dataset is being added
+
+    Returns:
+        UUID: Unique identifier for the dataset
+    """
+
+    def _get_legacy_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID:
+        """
+        Legacy function, returns a unique UUID for dataset based on dataset name and user id.
+        Needed to support legacy datasets without tenant information.
+        Args:
+            dataset_name: string representing the dataset name
+            user: Current User object adding the dataset
+
+        Returns:
+            UUID: Unique identifier for the dataset
+        """
+        if isinstance(dataset_name, UUID):
+            return dataset_name
+        return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
+
+    def _get_modern_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID:
+        """
+        Returns a unique UUID for dataset based on dataset name, user id and tenant_id.
+        Args:
+            dataset_name: string representing the dataset name
+            user: Current User object adding the dataset
+            tenant_id: UUID of the tenant for which dataset is being added
+
+        Returns:
+            UUID: Unique identifier for the dataset
+        """
+        if isinstance(dataset_name, UUID):
+            return dataset_name
+        return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}{str(user.tenant_id)}")
+
+    # Get all possible dataset_id values
+    dataset_id = {
+        "modern_dataset_id": _get_modern_unique_dataset_id(dataset_name=dataset_name, user=user),
+        "legacy_dataset_id": _get_legacy_unique_dataset_id(dataset_name=dataset_name, user=user),
+    }
+
+    # Check if dataset with legacy_dataset_id exists, if so use that one, else use modern_dataset_id
+    db_engine = get_relational_engine()
+    async with db_engine.get_async_session() as session:
+        legacy_dataset = (
+            await session.execute(
+                select(Dataset).filter(Dataset.id == dataset_id["legacy_dataset_id"])
+            )
+        ).scalar_one_or_none()
+
+        if not legacy_dataset:
+            return dataset_id["modern_dataset_id"]
+        return dataset_id["legacy_dataset_id"]
--- a/cognee/modules/data/models/Dataset.py
+++ b/cognee/modules/data/models/Dataset.py
@ -18,6 +18,7 @@ class Dataset(Base):
    updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))

    owner_id = Column(UUID, index=True)
+    tenant_id = Column(UUID, index=True, nullable=True)

    acls = relationship("ACL", back_populates="dataset", cascade="all, delete-orphan")

@ -36,5 +37,6 @@ class Dataset(Base):
            "createdAt": self.created_at.isoformat(),
            "updatedAt": self.updated_at.isoformat() if self.updated_at else None,
            "ownerId": str(self.owner_id),
+            "tenantId": str(self.tenant_id),
            "data": [data.to_json() for data in self.data],
        }
--- a/cognee/modules/data/processing/document_types/CsvDocument.py
+++ b/cognee/modules/data/processing/document_types/CsvDocument.py
@ -0,0 +1,33 @@
+import io
+import csv
+from typing import Type
+
+from cognee.modules.chunking.Chunker import Chunker
+from cognee.infrastructure.files.utils.open_data_file import open_data_file
+from .Document import Document
+
+
+class CsvDocument(Document):
+    type: str = "csv"
+    mime_type: str = "text/csv"
+
+    async def read(self, chunker_cls: Type[Chunker], max_chunk_size: int):
+        async def get_text():
+            async with open_data_file(
+                self.raw_data_location, mode="r", encoding="utf-8", newline=""
+            ) as file:
+                content = file.read()
+                file_like_obj = io.StringIO(content)
+                reader = csv.DictReader(file_like_obj)
+
+                for row in reader:
+                    pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()]
+                    row_text = ", ".join(pairs)
+                    if not row_text.strip():
+                        break
+                    yield row_text
+
+        chunker = chunker_cls(self, max_chunk_size=max_chunk_size, get_text=get_text)
+
+        async for chunk in chunker.read():
+            yield chunk
--- a/cognee/modules/data/processing/document_types/init.py
+++ b/cognee/modules/data/processing/document_types/init.py
@ -4,3 +4,4 @@ from .TextDocument import TextDocument
 from .ImageDocument import ImageDocument
 from .AudioDocument import AudioDocument
 from .UnstructuredDocument import UnstructuredDocument
+from .CsvDocument import CsvDocument
--- a/cognee/modules/graph/cognee_graph/CogneeGraph.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py
@ -56,6 +56,68 @@ class CogneeGraph(CogneeAbstractGraph):
    def get_edges(self) -> List[Edge]:
        return self.edges

+    async def _get_nodeset_subgraph(
+        self,
+        adapter,
+        node_type,
+        node_name,
+    ):
+        """Retrieve subgraph based on node type and name."""
+        logger.info("Retrieving graph filtered by node type and node name (NodeSet).")
+        nodes_data, edges_data = await adapter.get_nodeset_subgraph(
+            node_type=node_type, node_name=node_name
+        )
+        if not nodes_data or not edges_data:
+            raise EntityNotFoundError(
+                message="Nodeset does not exist, or empty nodeset projected from the database."
+            )
+        return nodes_data, edges_data
+
+    async def _get_full_or_id_filtered_graph(
+        self,
+        adapter,
+        relevant_ids_to_filter,
+    ):
+        """Retrieve full or ID-filtered graph with fallback."""
+        if relevant_ids_to_filter is None:
+            logger.info("Retrieving full graph.")
+            nodes_data, edges_data = await adapter.get_graph_data()
+            if not nodes_data or not edges_data:
+                raise EntityNotFoundError(message="Empty graph projected from the database.")
+            return nodes_data, edges_data
+
+        get_graph_data_fn = getattr(adapter, "get_id_filtered_graph_data", adapter.get_graph_data)
+        if getattr(adapter.__class__, "get_id_filtered_graph_data", None):
+            logger.info("Retrieving ID-filtered graph from database.")
+            nodes_data, edges_data = await get_graph_data_fn(target_ids=relevant_ids_to_filter)
+        else:
+            logger.info("Retrieving full graph from database.")
+            nodes_data, edges_data = await get_graph_data_fn()
+        if hasattr(adapter, "get_id_filtered_graph_data") and (not nodes_data or not edges_data):
+            logger.warning(
+                "Id filtered graph returned empty, falling back to full graph retrieval."
+            )
+            logger.info("Retrieving full graph")
+            nodes_data, edges_data = await adapter.get_graph_data()
+
+        if not nodes_data or not edges_data:
+            raise EntityNotFoundError("Empty graph projected from the database.")
+        return nodes_data, edges_data
+
+    async def _get_filtered_graph(
+        self,
+        adapter,
+        memory_fragment_filter,
+    ):
+        """Retrieve graph filtered by attributes."""
+        logger.info("Retrieving graph filtered by memory fragment")
+        nodes_data, edges_data = await adapter.get_filtered_graph_data(
+            attribute_filters=memory_fragment_filter
+        )
+        if not nodes_data or not edges_data:
+            raise EntityNotFoundError(message="Empty filtered graph projected from the database.")
+        return nodes_data, edges_data
+
    async def project_graph_from_db(
        self,
        adapter: Union[GraphDBInterface],
@ -67,40 +129,39 @@ class CogneeGraph(CogneeAbstractGraph):
        memory_fragment_filter=[],
        node_type: Optional[Type] = None,
        node_name: Optional[List[str]] = None,
+        relevant_ids_to_filter: Optional[List[str]] = None,
+        triplet_distance_penalty: float = 3.5,
    ) -> None:
        if node_dimension < 1 or edge_dimension < 1:
            raise InvalidDimensionsError()
        try:
+            if node_type is not None and node_name not in [None, [], ""]:
+                nodes_data, edges_data = await self._get_nodeset_subgraph(
+                    adapter, node_type, node_name
+                )
+            elif len(memory_fragment_filter) == 0:
+                nodes_data, edges_data = await self._get_full_or_id_filtered_graph(
+                    adapter, relevant_ids_to_filter
+                )
+            else:
+                nodes_data, edges_data = await self._get_filtered_graph(
+                    adapter, memory_fragment_filter
+                )
+
            import time

            start_time = time.time()
-
-            # Determine projection strategy
-            if node_type is not None and node_name not in [None, [], ""]:
-                nodes_data, edges_data = await adapter.get_nodeset_subgraph(
-                    node_type=node_type, node_name=node_name
-                )
-                if not nodes_data or not edges_data:
-                    raise EntityNotFoundError(
-                        message="Nodeset does not exist, or empty nodetes projected from the database."
-                    )
-            elif len(memory_fragment_filter) == 0:
-                nodes_data, edges_data = await adapter.get_graph_data()
-                if not nodes_data or not edges_data:
-                    raise EntityNotFoundError(message="Empty graph projected from the database.")
-            else:
-                nodes_data, edges_data = await adapter.get_filtered_graph_data(
-                    attribute_filters=memory_fragment_filter
-                )
-                if not nodes_data or not edges_data:
-                    raise EntityNotFoundError(
-                        message="Empty filtered graph projected from the database."
-                    )
-
            # Process nodes
            for node_id, properties in nodes_data:
                node_attributes = {key: properties.get(key) for key in node_properties_to_project}
-                self.add_node(Node(str(node_id), node_attributes, dimension=node_dimension))
+                self.add_node(
+                    Node(
+                        str(node_id),
+                        node_attributes,
+                        dimension=node_dimension,
+                        node_penalty=triplet_distance_penalty,
+                    )
+                )

            # Process edges
            for source_id, target_id, relationship_type, properties in edges_data:
@ -118,6 +179,7 @@ class CogneeGraph(CogneeAbstractGraph):
                        attributes=edge_attributes,
                        directed=directed,
                        dimension=edge_dimension,
+                        edge_penalty=triplet_distance_penalty,
                    )
                    self.add_edge(edge)

--- a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py
@ -20,13 +20,17 @@ class Node:
    status: np.ndarray

    def __init__(
-        self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1
+        self,
+        node_id: str,
+        attributes: Optional[Dict[str, Any]] = None,
+        dimension: int = 1,
+        node_penalty: float = 3.5,
    ):
        if dimension <= 0:
            raise InvalidDimensionsError()
        self.id = node_id
        self.attributes = attributes if attributes is not None else {}
-        self.attributes["vector_distance"] = float("inf")
+        self.attributes["vector_distance"] = node_penalty
        self.skeleton_neighbours = []
        self.skeleton_edges = []
        self.status = np.ones(dimension, dtype=int)
@ -105,13 +109,14 @@ class Edge:
        attributes: Optional[Dict[str, Any]] = None,
        directed: bool = True,
        dimension: int = 1,
+        edge_penalty: float = 3.5,
    ):
        if dimension <= 0:
            raise InvalidDimensionsError()
        self.node1 = node1
        self.node2 = node2
        self.attributes = attributes if attributes is not None else {}
-        self.attributes["vector_distance"] = float("inf")
+        self.attributes["vector_distance"] = edge_penalty
        self.directed = directed
        self.status = np.ones(dimension, dtype=int)

--- a/cognee/modules/ingestion/identify.py
+++ b/cognee/modules/ingestion/identify.py
@ -1,11 +1,11 @@
-from uuid import uuid5, NAMESPACE_OID
+from uuid import UUID
 from .data_types import IngestionData

 from cognee.modules.users.models import User
+from cognee.modules.data.methods import get_unique_data_id


-def identify(data: IngestionData, user: User) -> str:
+async def identify(data: IngestionData, user: User) -> UUID:
    data_content_hash: str = data.get_identifier()

-    # return UUID hash of file contents + owner id
-    return uuid5(NAMESPACE_OID, f"{data_content_hash}{user.id}")
+    return await get_unique_data_id(data_identifier=data_content_hash, user=user)
--- a/cognee/modules/notebooks/operations/run_in_local_sandbox.py
+++ b/cognee/modules/notebooks/operations/run_in_local_sandbox.py
@ -2,6 +2,8 @@ import io
 import sys
 import traceback

+import cognee
+

 def wrap_in_async_handler(user_code: str) -> str:
    return (
@ -34,6 +36,7 @@ def run_in_local_sandbox(code, environment=None, loop=None):

    environment["print"] = customPrintFunction
    environment["running_loop"] = loop
+    environment["cognee"] = cognee

    try:
        exec(code, environment)
--- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
+++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
@ -2,7 +2,7 @@ import os
 import difflib
 from cognee.shared.logging_utils import get_logger
 from collections import deque
-from typing import List, Tuple, Dict, Optional, Any, Union
+from typing import List, Tuple, Dict, Optional, Any, Union, IO
 from rdflib import Graph, URIRef, RDF, RDFS, OWL

 from cognee.modules.ontology.exceptions import (
@ -26,44 +26,76 @@ class RDFLibOntologyResolver(BaseOntologyResolver):

    def __init__(
        self,
-        ontology_file: Optional[Union[str, List[str]]] = None,
+        ontology_file: Optional[Union[str, List[str], IO, List[IO]]] = None,
        matching_strategy: Optional[MatchingStrategy] = None,
    ) -> None:
        super().__init__(matching_strategy)
        self.ontology_file = ontology_file
        try:
-            files_to_load = []
+            self.graph = None
            if ontology_file is not None:
-                if isinstance(ontology_file, str):
+                files_to_load = []
+                file_objects = []
+
+                if hasattr(ontology_file, "read"):
+                    file_objects = [ontology_file]
+                elif isinstance(ontology_file, str):
                    files_to_load = [ontology_file]
                elif isinstance(ontology_file, list):
-                    files_to_load = ontology_file
+                    if all(hasattr(item, "read") for item in ontology_file):
+                        file_objects = ontology_file
+                    else:
+                        files_to_load = ontology_file
                else:
                    raise ValueError(
-                        f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}"
+                        f"ontology_file must be a string, list of strings, file-like object, list of file-like objects, or None. Got: {type(ontology_file)}"
                    )

-            if files_to_load:
-                self.graph = Graph()
-                loaded_files = []
-                for file_path in files_to_load:
-                    if os.path.exists(file_path):
-                        self.graph.parse(file_path)
-                        loaded_files.append(file_path)
-                        logger.info("Ontology loaded successfully from file: %s", file_path)
-                    else:
-                        logger.warning(
-                            "Ontology file '%s' not found. Skipping this file.",
-                            file_path,
+                if file_objects:
+                    self.graph = Graph()
+                    loaded_objects = []
+                    for file_obj in file_objects:
+                        try:
+                            content = file_obj.read()
+                            self.graph.parse(data=content, format="xml")
+                            loaded_objects.append(file_obj)
+                            logger.info("Ontology loaded successfully from file object")
+                        except Exception as e:
+                            logger.warning("Failed to parse ontology file object: %s", str(e))
+
+                    if not loaded_objects:
+                        logger.info(
+                            "No valid ontology file objects found. No owl ontology will be attached to the graph."
                        )
+                        self.graph = None
+                    else:
+                        logger.info("Total ontology file objects loaded: %d", len(loaded_objects))

-                if not loaded_files:
-                    logger.info(
-                        "No valid ontology files found. No owl ontology will be attached to the graph."
-                    )
-                    self.graph = None
+                elif files_to_load:
+                    self.graph = Graph()
+                    loaded_files = []
+                    for file_path in files_to_load:
+                        if os.path.exists(file_path):
+                            self.graph.parse(file_path)
+                            loaded_files.append(file_path)
+                            logger.info("Ontology loaded successfully from file: %s", file_path)
+                        else:
+                            logger.warning(
+                                "Ontology file '%s' not found. Skipping this file.",
+                                file_path,
+                            )
+
+                    if not loaded_files:
+                        logger.info(
+                            "No valid ontology files found. No owl ontology will be attached to the graph."
+                        )
+                        self.graph = None
+                    else:
+                        logger.info("Total ontology files loaded: %d", len(loaded_files))
                else:
-                    logger.info("Total ontology files loaded: %d", len(loaded_files))
+                    logger.info(
+                        "No ontology file provided. No owl ontology will be attached to the graph."
+                    )
            else:
                logger.info(
                    "No ontology file provided. No owl ontology will be attached to the graph."
--- a/cognee/modules/pipelines/operations/run_tasks_data_item.py
+++ b/cognee/modules/pipelines/operations/run_tasks_data_item.py
@ -69,7 +69,7 @@ async def run_tasks_data_item_incremental(
        async with open_data_file(file_path) as file:
            classified_data = ingestion.classify(file)
            # data_id is the hash of file contents + owner id to avoid duplicate data
-            data_id = ingestion.identify(classified_data, user)
+            data_id = await ingestion.identify(classified_data, user)
    else:
        # If data was already processed by Cognee get data id
        data_id = data_item.id
--- a/cognee/modules/retrieval/cypher_search_retriever.py
+++ b/cognee/modules/retrieval/cypher_search_retriever.py
@ -1,4 +1,6 @@
 from typing import Any, Optional
+from fastapi.encoders import jsonable_encoder
+
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.utils.completion import generate_completion
@ -50,7 +52,7 @@ class CypherSearchRetriever(BaseRetriever):
                logger.warning("Search attempt on an empty knowledge graph")
                return []

-            result = await graph_engine.query(query)
+            result = jsonable_encoder(await graph_engine.query(query))
        except Exception as e:
            logger.error("Failed to execture cypher search retrieval: %s", str(e))
            raise CypherSearchError() from e
--- a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py
+++ b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py
@ -39,6 +39,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
        node_type: Optional[Type] = None,
        node_name: Optional[List[str]] = None,
        save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
    ):
        super().__init__(
            user_prompt_path=user_prompt_path,
@ -48,6 +50,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
            node_name=node_name,
            save_interaction=save_interaction,
            system_prompt=system_prompt,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )

    async def get_completion(
--- a/cognee/modules/retrieval/graph_completion_cot_retriever.py
+++ b/cognee/modules/retrieval/graph_completion_cot_retriever.py
@ -65,6 +65,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
        node_type: Optional[Type] = None,
        node_name: Optional[List[str]] = None,
        save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
    ):
        super().__init__(
            user_prompt_path=user_prompt_path,
@ -74,6 +76,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
            node_type=node_type,
            node_name=node_name,
            save_interaction=save_interaction,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )
        self.validation_system_prompt_path = validation_system_prompt_path
        self.validation_user_prompt_path = validation_user_prompt_path
--- a/cognee/modules/retrieval/graph_completion_retriever.py
+++ b/cognee/modules/retrieval/graph_completion_retriever.py
@ -47,6 +47,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
        node_type: Optional[Type] = None,
        node_name: Optional[List[str]] = None,
        save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
    ):
        """Initialize retriever with prompt paths and search parameters."""
        self.save_interaction = save_interaction
@ -54,8 +56,10 @@ class GraphCompletionRetriever(BaseGraphRetriever):
        self.system_prompt_path = system_prompt_path
        self.system_prompt = system_prompt
        self.top_k = top_k if top_k is not None else 5
+        self.wide_search_top_k = wide_search_top_k
        self.node_type = node_type
        self.node_name = node_name
+        self.triplet_distance_penalty = triplet_distance_penalty

    async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
        """
@ -105,6 +109,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
            collections=vector_index_collections or None,
            node_type=self.node_type,
            node_name=self.node_name,
+            wide_search_top_k=self.wide_search_top_k,
+            triplet_distance_penalty=self.triplet_distance_penalty,
        )

        return found_triplets
@ -141,6 +147,10 @@ class GraphCompletionRetriever(BaseGraphRetriever):

        return triplets

+    async def convert_retrieved_objects_to_context(self, triplets: List[Edge]):
+        context = await self.resolve_edges_to_text(triplets)
+        return context
+
    async def get_completion(
        self,
        query: str,
--- a/cognee/modules/retrieval/graph_summary_completion_retriever.py
+++ b/cognee/modules/retrieval/graph_summary_completion_retriever.py
@ -26,6 +26,8 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
        node_type: Optional[Type] = None,
        node_name: Optional[List[str]] = None,
        save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
    ):
        """Initialize retriever with default prompt paths and search parameters."""
        super().__init__(
@ -36,6 +38,8 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
            node_name=node_name,
            save_interaction=save_interaction,
            system_prompt=system_prompt,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )
        self.summarize_prompt_path = summarize_prompt_path

--- a/cognee/modules/retrieval/temporal_retriever.py
+++ b/cognee/modules/retrieval/temporal_retriever.py
@ -47,6 +47,8 @@ class TemporalRetriever(GraphCompletionRetriever):
        top_k: Optional[int] = 5,
        node_type: Optional[Type] = None,
        node_name: Optional[List[str]] = None,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
    ):
        super().__init__(
            user_prompt_path=user_prompt_path,
@ -54,6 +56,8 @@ class TemporalRetriever(GraphCompletionRetriever):
            top_k=top_k,
            node_type=node_type,
            node_name=node_name,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )
        self.user_prompt_path = user_prompt_path
        self.system_prompt_path = system_prompt_path
--- a/cognee/modules/retrieval/utils/brute_force_triplet_search.py
+++ b/cognee/modules/retrieval/utils/brute_force_triplet_search.py
@ -58,6 +58,8 @@ async def get_memory_fragment(
    properties_to_project: Optional[List[str]] = None,
    node_type: Optional[Type] = None,
    node_name: Optional[List[str]] = None,
+    relevant_ids_to_filter: Optional[List[str]] = None,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> CogneeGraph:
    """Creates and initializes a CogneeGraph memory fragment with optional property projections."""
    if properties_to_project is None:
@ -74,6 +76,8 @@ async def get_memory_fragment(
            edge_properties_to_project=["relationship_name", "edge_text"],
            node_type=node_type,
            node_name=node_name,
+            relevant_ids_to_filter=relevant_ids_to_filter,
+            triplet_distance_penalty=triplet_distance_penalty,
        )

    except EntityNotFoundError:
@ -95,6 +99,8 @@ async def brute_force_triplet_search(
    memory_fragment: Optional[CogneeGraph] = None,
    node_type: Optional[Type] = None,
    node_name: Optional[List[str]] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> List[Edge]:
    """
    Performs a brute force search to retrieve the top triplets from the graph.
@ -107,6 +113,8 @@ async def brute_force_triplet_search(
        memory_fragment (Optional[CogneeGraph]): Existing memory fragment to reuse.
        node_type: node type to filter
        node_name: node name to filter
+        wide_search_top_k (Optional[int]): Number of initial elements to retrieve from collections
+        triplet_distance_penalty (Optional[float]): Default distance penalty in graph projection

    Returns:
        list: The top triplet results.
@ -116,10 +124,10 @@ async def brute_force_triplet_search(
    if top_k <= 0:
        raise ValueError("top_k must be a positive integer.")

-    if memory_fragment is None:
-        memory_fragment = await get_memory_fragment(
-            properties_to_project, node_type=node_type, node_name=node_name
-        )
+    # Setting wide search limit based on the parameters
+    non_global_search = node_name is None
+
+    wide_search_limit = wide_search_top_k if non_global_search else None

    if collections is None:
        collections = [
@ -140,7 +148,7 @@ async def brute_force_triplet_search(
    async def search_in_collection(collection_name: str):
        try:
            return await vector_engine.search(
-                collection_name=collection_name, query_vector=query_vector, limit=None
+                collection_name=collection_name, query_vector=query_vector, limit=wide_search_limit
            )
        except CollectionNotFoundError:
            return []
@ -156,15 +164,38 @@ async def brute_force_triplet_search(
            return []

        # Final statistics
-        projection_time = time.time() - start_time
+        vector_collection_search_time = time.time() - start_time
        logger.info(
-            f"Vector collection retrieval completed: Retrieved distances from {sum(1 for res in results if res)} collections in {projection_time:.2f}s"
+            f"Vector collection retrieval completed: Retrieved distances from {sum(1 for res in results if res)} collections in {vector_collection_search_time:.2f}s"
        )

        node_distances = {collection: result for collection, result in zip(collections, results)}

        edge_distances = node_distances.get("EdgeType_relationship_name", None)

+        if wide_search_limit is not None:
+            relevant_ids_to_filter = list(
+                {
+                    str(getattr(scored_node, "id"))
+                    for collection_name, score_collection in node_distances.items()
+                    if collection_name != "EdgeType_relationship_name"
+                    and isinstance(score_collection, (list, tuple))
+                    for scored_node in score_collection
+                    if getattr(scored_node, "id", None)
+                }
+            )
+        else:
+            relevant_ids_to_filter = None
+
+        if memory_fragment is None:
+            memory_fragment = await get_memory_fragment(
+                properties_to_project=properties_to_project,
+                node_type=node_type,
+                node_name=node_name,
+                relevant_ids_to_filter=relevant_ids_to_filter,
+                triplet_distance_penalty=triplet_distance_penalty,
+            )
+
        await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
        await memory_fragment.map_vector_distances_to_graph_edges(
            vector_engine=vector_engine, query_vector=query_vector, edge_distances=edge_distances
--- a/cognee/modules/search/methods/get_search_type_tools.py
+++ b/cognee/modules/search/methods/get_search_type_tools.py
@ -37,6 +37,8 @@ async def get_search_type_tools(
    node_name: Optional[List[str]] = None,
    save_interaction: bool = False,
    last_k: Optional[int] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> list:
    search_tasks: dict[SearchType, List[Callable]] = {
        SearchType.SUMMARIES: [
@ -67,6 +69,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_completion,
            GraphCompletionRetriever(
                system_prompt_path=system_prompt_path,
@ -75,6 +79,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_context,
        ],
        SearchType.GRAPH_COMPLETION_COT: [
@ -85,6 +91,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_completion,
            GraphCompletionCotRetriever(
                system_prompt_path=system_prompt_path,
@ -93,6 +101,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_context,
        ],
        SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: [
@ -103,6 +113,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_completion,
            GraphCompletionContextExtensionRetriever(
                system_prompt_path=system_prompt_path,
@ -111,6 +123,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_context,
        ],
        SearchType.GRAPH_SUMMARY_COMPLETION: [
@ -121,6 +135,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_completion,
            GraphSummaryCompletionRetriever(
                system_prompt_path=system_prompt_path,
@ -129,6 +145,8 @@ async def get_search_type_tools(
                node_name=node_name,
                save_interaction=save_interaction,
                system_prompt=system_prompt,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            ).get_context,
        ],
        SearchType.CODE: [
@ -145,8 +163,16 @@ async def get_search_type_tools(
        ],
        SearchType.FEEDBACK: [UserQAFeedback(last_k=last_k).add_feedback],
        SearchType.TEMPORAL: [
-            TemporalRetriever(top_k=top_k).get_completion,
-            TemporalRetriever(top_k=top_k).get_context,
+            TemporalRetriever(
+                top_k=top_k,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
+            ).get_completion,
+            TemporalRetriever(
+                top_k=top_k,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
+            ).get_context,
        ],
        SearchType.CHUNKS_LEXICAL: (
            lambda _r=JaccardChunksRetriever(top_k=top_k): [
--- a/cognee/modules/search/methods/no_access_control_search.py
+++ b/cognee/modules/search/methods/no_access_control_search.py
@ -24,6 +24,8 @@ async def no_access_control_search(
    last_k: Optional[int] = None,
    only_context: bool = False,
    session_id: Optional[str] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
    search_tools = await get_search_type_tools(
        query_type=query_type,
@ -35,6 +37,8 @@ async def no_access_control_search(
        node_name=node_name,
        save_interaction=save_interaction,
        last_k=last_k,
+        wide_search_top_k=wide_search_top_k,
+        triplet_distance_penalty=triplet_distance_penalty,
    )
    graph_engine = await get_graph_engine()
    is_empty = await graph_engine.is_empty()
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -47,6 +47,8 @@ async def search(
    only_context: bool = False,
    use_combined_context: bool = False,
    session_id: Optional[str] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> Union[CombinedSearchResult, List[SearchResult]]:
    """

@ -90,6 +92,8 @@ async def search(
            only_context=only_context,
            use_combined_context=use_combined_context,
            session_id=session_id,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )
    else:
        search_results = [
@ -105,6 +109,8 @@ async def search(
                last_k=last_k,
                only_context=only_context,
                session_id=session_id,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            )
        ]

@ -172,6 +178,7 @@ async def search(
                            "search_result": [context] if context else None,
                            "dataset_id": datasets[0].id,
                            "dataset_name": datasets[0].name,
+                            "dataset_tenant_id": datasets[0].tenant_id,
                            "graphs": graphs,
                        }
                    )
@ -181,6 +188,7 @@ async def search(
                            "search_result": [result] if result else None,
                            "dataset_id": datasets[0].id,
                            "dataset_name": datasets[0].name,
+                            "dataset_tenant_id": datasets[0].tenant_id,
                            "graphs": graphs,
                        }
                    )
@ -217,6 +225,8 @@ async def authorized_search(
    only_context: bool = False,
    use_combined_context: bool = False,
    session_id: Optional[str] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> Union[
    Tuple[Any, Union[List[Edge], str], List[Dataset]],
    List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
@ -244,6 +254,8 @@ async def authorized_search(
            last_k=last_k,
            only_context=True,
            session_id=session_id,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )

        context = {}
@ -265,6 +277,8 @@ async def authorized_search(
            node_name=node_name,
            save_interaction=save_interaction,
            last_k=last_k,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )
        search_tools = specific_search_tools
        if len(search_tools) == 2:
@ -304,6 +318,7 @@ async def authorized_search(
        last_k=last_k,
        only_context=only_context,
        session_id=session_id,
+        wide_search_top_k=wide_search_top_k,
    )

    return search_results
@ -323,6 +338,8 @@ async def search_in_datasets_context(
    only_context: bool = False,
    context: Optional[Any] = None,
    session_id: Optional[str] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
    """
    Searches all provided datasets and handles setting up of appropriate database context based on permissions.
@ -343,6 +360,8 @@ async def search_in_datasets_context(
        only_context: bool = False,
        context: Optional[Any] = None,
        session_id: Optional[str] = None,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
    ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
        # Set database configuration in async context for each dataset user has access for
        await set_database_global_context_variables(dataset.id, dataset.owner_id)
@ -376,6 +395,8 @@ async def search_in_datasets_context(
            node_name=node_name,
            save_interaction=save_interaction,
            last_k=last_k,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
        )
        search_tools = specific_search_tools
        if len(search_tools) == 2:
@ -411,6 +432,8 @@ async def search_in_datasets_context(
                only_context=only_context,
                context=context,
                session_id=session_id,
+                wide_search_top_k=wide_search_top_k,
+                triplet_distance_penalty=triplet_distance_penalty,
            )
        )

--- a/cognee/modules/users/methods/create_user.py
+++ b/cognee/modules/users/methods/create_user.py
@ -18,7 +18,6 @@ from typing import Optional
 async def create_user(
    email: str,
    password: str,
-    tenant_id: Optional[str] = None,
    is_superuser: bool = False,
    is_active: bool = True,
    is_verified: bool = False,
@ -30,37 +29,23 @@ async def create_user(
        async with relational_engine.get_async_session() as session:
            async with get_user_db_context(session) as user_db:
                async with get_user_manager_context(user_db) as user_manager:
-                    if tenant_id:
-                        # Check if the tenant already exists
-                        result = await session.execute(select(Tenant).where(Tenant.id == tenant_id))
-                        tenant = result.scalars().first()
-                        if not tenant:
-                            raise TenantNotFoundError
-
-                        user = await user_manager.create(
-                            UserCreate(
-                                email=email,
-                                password=password,
-                                tenant_id=tenant.id,
-                                is_superuser=is_superuser,
-                                is_active=is_active,
-                                is_verified=is_verified,
-                            )
-                        )
-                    else:
-                        user = await user_manager.create(
-                            UserCreate(
-                                email=email,
-                                password=password,
-                                is_superuser=is_superuser,
-                                is_active=is_active,
-                                is_verified=is_verified,
-                            )
+                    user = await user_manager.create(
+                        UserCreate(
+                            email=email,
+                            password=password,
+                            is_superuser=is_superuser,
+                            is_active=is_active,
+                            is_verified=is_verified,
                        )
+                    )

                    if auto_login:
                        await session.refresh(user)

+                    # Update tenants and roles information for User object
+                    _ = await user.awaitable_attrs.tenants
+                    _ = await user.awaitable_attrs.roles
+
                    return user
    except UserAlreadyExists as error:
        print(f"User {email} already exists")
--- a/cognee/modules/users/methods/get_default_user.py
+++ b/cognee/modules/users/methods/get_default_user.py
@ -18,7 +18,9 @@ async def get_default_user() -> User:
    try:
        async with db_engine.get_async_session() as session:
            query = (
-                select(User).options(selectinload(User.roles)).where(User.email == default_email)
+                select(User)
+                .options(selectinload(User.roles), selectinload(User.tenants))
+                .where(User.email == default_email)
            )

            result = await session.execute(query)
--- a/cognee/modules/users/methods/get_user.py
+++ b/cognee/modules/users/methods/get_user.py
@ -14,7 +14,7 @@ async def get_user(user_id: UUID):
        user = (
            await session.execute(
                select(User)
-                .options(selectinload(User.roles), selectinload(User.tenant))
+                .options(selectinload(User.roles), selectinload(User.tenants))
                .where(User.id == user_id)
            )
        ).scalar()
--- a/cognee/modules/users/methods/get_user_by_email.py
+++ b/cognee/modules/users/methods/get_user_by_email.py
@ -13,7 +13,7 @@ async def get_user_by_email(user_email: str):
        user = (
            await session.execute(
                select(User)
-                .options(joinedload(User.roles), joinedload(User.tenant))
+                .options(joinedload(User.roles), joinedload(User.tenants))
                .where(User.email == user_email)
            )
        ).scalar()
--- a/cognee/modules/users/models/DatasetDatabase.py
+++ b/cognee/modules/users/models/DatasetDatabase.py
@ -15,5 +15,14 @@ class DatasetDatabase(Base):
    vector_database_name = Column(String, unique=True, nullable=False)
    graph_database_name = Column(String, unique=True, nullable=False)

+    vector_database_provider = Column(String, unique=False, nullable=False)
+    graph_database_provider = Column(String, unique=False, nullable=False)
+
+    vector_database_url = Column(String, unique=False, nullable=True)
+    graph_database_url = Column(String, unique=False, nullable=True)
+
+    vector_database_key = Column(String, unique=False, nullable=True)
+    graph_database_key = Column(String, unique=False, nullable=True)
+
    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
--- a/cognee/modules/users/models/Tenant.py
+++ b/cognee/modules/users/models/Tenant.py
@ -1,7 +1,7 @@
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, Mapped
 from sqlalchemy import Column, String, ForeignKey, UUID
 from .Principal import Principal
-from .User import User
+from .UserTenant import UserTenant
 from .Role import Role


@ -13,14 +13,13 @@ class Tenant(Principal):

    owner_id = Column(UUID, index=True)

-    # One-to-Many relationship with User; specify the join via User.tenant_id
-    users = relationship(
+    users: Mapped[list["User"]] = relationship(  # noqa: F821
        "User",
-        back_populates="tenant",
-        foreign_keys=lambda: [User.tenant_id],
+        secondary=UserTenant.__tablename__,
+        back_populates="tenants",
    )

-    # One-to-Many relationship with Role (if needed; similar fix)
+    # One-to-Many relationship with Role
    roles = relationship(
        "Role",
        back_populates="tenant",
--- a/cognee/modules/users/models/User.py
+++ b/cognee/modules/users/models/User.py
@ -6,8 +6,10 @@ from sqlalchemy import ForeignKey, Column, UUID
 from sqlalchemy.orm import relationship, Mapped

 from .Principal import Principal
+from .UserTenant import UserTenant
 from .UserRole import UserRole
 from .Role import Role
+from .Tenant import Tenant


 class User(SQLAlchemyBaseUserTableUUID, Principal):
@ -15,7 +17,7 @@ class User(SQLAlchemyBaseUserTableUUID, Principal):

    id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), primary_key=True)

-    # Foreign key to Tenant (Many-to-One relationship)
+    # Foreign key to current Tenant (Many-to-One relationship)
    tenant_id = Column(UUID, ForeignKey("tenants.id"))

    # Many-to-Many Relationship with Roles
@ -25,11 +27,11 @@ class User(SQLAlchemyBaseUserTableUUID, Principal):
        back_populates="users",
    )

-    # Relationship to Tenant
-    tenant = relationship(
+    # Many-to-Many Relationship with Tenants user is a part of
+    tenants: Mapped[list["Tenant"]] = relationship(
        "Tenant",
+        secondary=UserTenant.__tablename__,
        back_populates="users",
-        foreign_keys=[tenant_id],
    )

    # ACL Relationship (One-to-Many)
@ -46,7 +48,6 @@ class UserRead(schemas.BaseUser[uuid_UUID]):


 class UserCreate(schemas.BaseUserCreate):
-    tenant_id: Optional[uuid_UUID] = None
    is_verified: bool = True


--- a/cognee/modules/users/models/UserTenant.py
+++ b/cognee/modules/users/models/UserTenant.py
@ -0,0 +1,12 @@
+from datetime import datetime, timezone
+from sqlalchemy import Column, ForeignKey, DateTime, UUID
+from cognee.infrastructure.databases.relational import Base
+
+
+class UserTenant(Base):
+    __tablename__ = "user_tenants"
+
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+
+    user_id = Column(UUID, ForeignKey("users.id"), primary_key=True)
+    tenant_id = Column(UUID, ForeignKey("tenants.id"), primary_key=True)
--- a/cognee/modules/users/models/init.py
+++ b/cognee/modules/users/models/init.py
@ -1,6 +1,7 @@
 from .User import User
 from .Role import Role
 from .UserRole import UserRole
+from .UserTenant import UserTenant
 from .DatasetDatabase import DatasetDatabase
 from .RoleDefaultPermissions import RoleDefaultPermissions
 from .UserDefaultPermissions import UserDefaultPermissions
--- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
@ -1,11 +1,8 @@
-from types import SimpleNamespace
-
 from cognee.shared.logging_utils import get_logger

 from ...models.User import User
 from cognee.modules.data.models.Dataset import Dataset
 from cognee.modules.users.permissions.methods import get_principal_datasets
-from cognee.modules.users.permissions.methods import get_role, get_tenant

 logger = get_logger()

@ -25,17 +22,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
    # Get all datasets User has explicit access to
    datasets.extend(await get_principal_datasets(user, permission_type))

-    if user.tenant_id:
-        # Get all datasets all tenants have access to
-        tenant = await get_tenant(user.tenant_id)
+    # Get all tenants user is a part of
+    tenants = await user.awaitable_attrs.tenants
+    for tenant in tenants:
+        # Get all datasets all tenant members have access to
        datasets.extend(await get_principal_datasets(tenant, permission_type))

-        # Get all datasets Users roles have access to
-        if isinstance(user, SimpleNamespace):
-            # If simple namespace use roles defined in user
-            roles = user.roles
-        else:
-            roles = await user.awaitable_attrs.roles
+        # Get all datasets accessible by roles user is a part of
+        roles = await user.awaitable_attrs.roles
        for role in roles:
            datasets.extend(await get_principal_datasets(role, permission_type))

@ -45,4 +39,10 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
        # If the dataset id key already exists, leave the dictionary unchanged.
        unique.setdefault(dataset.id, dataset)

-    return list(unique.values())
+    # Filter out dataset that aren't part of the selected user's tenant
+    filtered_datasets = []
+    for dataset in list(unique.values()):
+        if dataset.tenant_id == user.tenant_id:
+            filtered_datasets.append(dataset)
+
+    return filtered_datasets
--- a/cognee/modules/users/roles/methods/add_user_to_role.py
+++ b/cognee/modules/users/roles/methods/add_user_to_role.py
@ -42,11 +42,13 @@ async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
            .first()
        )

+        user_tenants = await user.awaitable_attrs.tenants
+
        if not user:
            raise UserNotFoundError
        elif not role:
            raise RoleNotFoundError
-        elif user.tenant_id != role.tenant_id:
+        elif role.tenant_id not in [tenant.id for tenant in user_tenants]:
            raise TenantNotFoundError(
                message="User tenant does not match role tenant. User cannot be added to role."
            )
--- a/Show more
+++ b/Show more