From ac5118ee34c4bd149ac26d042e2ffe5292ee3459 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Wed, 15 Oct 2025 17:28:51 +0200
Subject: [PATCH 01/28] test:Add load test

---
 cognee/tests/load_test.py | 61 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 cognee/tests/load_test.py

diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py
new file mode 100644
index 000000000..da9b74ab9
--- /dev/null
+++ b/cognee/tests/load_test.py
@@ -0,0 +1,61 @@
+import os
+import pathlib
+import asyncio
+import time
+
+import cognee
+from cognee.modules.search.types import SearchType
+from cognee.shared.logging_utils import get_logger
+
+logger = get_logger()
+
+async def helper_func(num_of_searches):
+
+    start_time = time.time()
+
+    await cognee.cognify()
+
+    await asyncio.gather(
+        *[
+            cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION)
+            for _ in range(num_of_searches)
+        ]
+    )
+
+    end_time = time.time()
+
+    return end_time - start_time
+
+async def main():
+
+    file_path = os.path.join(
+        pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf"
+    )
+
+    num_of_pdfs = 10
+    num_of_reps = 5
+    upper_boundary_minutes = 3
+    average_minutes = 1.5
+
+    await asyncio.gather(
+        *[
+            cognee.add(file_path, dataset_name=f"dataset_{i}")
+            for i in range(num_of_pdfs)
+        ]
+    )
+
+    recorded_times = await asyncio.gather(
+        *[helper_func(num_of_pdfs) for _ in range(num_of_reps)]
+    )
+
+    average_recorded_time = sum(recorded_times) / len(recorded_times)
+
+    assert average_recorded_time <= average_minutes * 60
+
+    assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times)
+
+    return
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
\ No newline at end of file

From c16459d236a6e07b9267d323387b2be217fd5b46 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Wed, 15 Oct 2025 17:58:05 +0200
Subject: [PATCH 02/28] test: Add prune step to the test

---
 cognee/tests/load_test.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py
index da9b74ab9..c44efad00 100644
--- a/cognee/tests/load_test.py
+++ b/cognee/tests/load_test.py
@@ -9,7 +9,7 @@ from cognee.shared.logging_utils import get_logger
 
 logger = get_logger()
 
-async def helper_func(num_of_searches):
+async def process_and_search(num_of_searches):
 
     start_time = time.time()
 
@@ -37,6 +37,9 @@ async def main():
     upper_boundary_minutes = 3
     average_minutes = 1.5
 
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
     await asyncio.gather(
         *[
             cognee.add(file_path, dataset_name=f"dataset_{i}")
@@ -45,7 +48,7 @@ async def main():
     )
 
     recorded_times = await asyncio.gather(
-        *[helper_func(num_of_pdfs) for _ in range(num_of_reps)]
+        *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)]
     )
 
     average_recorded_time = sum(recorded_times) / len(recorded_times)
@@ -54,8 +57,6 @@ async def main():
 
     assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times)
 
-    return
-
 
 if __name__ == "__main__":
     asyncio.run(main())
\ No newline at end of file

From c5648e63375d9eb1520f5a007dda520f70c9c145 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Wed, 22 Oct 2025 09:22:11 +0200
Subject: [PATCH 03/28] test: Add load test.

---
 .github/workflows/e2e_tests.yml             | 31 ++++++++++++++++++++-
 cognee/tests/{load_test.py => test_load.py} | 30 ++++++++++++--------
 2 files changed, 49 insertions(+), 12 deletions(-)
 rename cognee/tests/{load_test.py => test_load.py} (65%)

diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index 9582a3f3b..5f66e71d2 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -330,4 +330,33 @@ jobs:
           DB_PORT: 5432
           DB_USERNAME: cognee
           DB_PASSWORD: cognee
-        run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py
\ No newline at end of file
+        run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py
+
+  test-load:
+    name: Test Load
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Dependencies already installed
+        run: echo "Dependencies already installed in setup"
+
+      - name: Run Load Test
+        env:
+          ENV: 'dev'
+          ENABLE_BACKEND_ACCESS_CONTROL: True
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./cognee/tests/test_load.py
\ No newline at end of file
diff --git a/cognee/tests/load_test.py b/cognee/tests/test_load.py
similarity index 65%
rename from cognee/tests/load_test.py
rename to cognee/tests/test_load.py
index c44efad00..09e2db084 100644
--- a/cognee/tests/load_test.py
+++ b/cognee/tests/test_load.py
@@ -9,8 +9,8 @@ from cognee.shared.logging_utils import get_logger
 
 logger = get_logger()
 
-async def process_and_search(num_of_searches):
 
+async def process_and_search(num_of_searches):
     start_time = time.time()
 
     await cognee.cognify()
@@ -26,26 +26,34 @@ async def process_and_search(num_of_searches):
 
     return end_time - start_time
 
-async def main():
 
+async def main():
     file_path = os.path.join(
         pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf"
     )
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load")
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load")
+        ).resolve()
+    )
+    cognee.config.system_root_directory(cognee_directory_path)
 
     num_of_pdfs = 10
     num_of_reps = 5
-    upper_boundary_minutes = 3
-    average_minutes = 1.5
+    upper_boundary_minutes = 10
+    average_minutes = 8
 
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
 
-    await asyncio.gather(
-        *[
-            cognee.add(file_path, dataset_name=f"dataset_{i}")
-            for i in range(num_of_pdfs)
-        ]
-    )
+    for i in range(num_of_pdfs):
+        await cognee.add(file_path, dataset_name=f"dataset_{i}")
 
     recorded_times = await asyncio.gather(
         *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)]
@@ -59,4 +67,4 @@ async def main():
 
 
 if __name__ == "__main__":
-    asyncio.run(main())
\ No newline at end of file
+    asyncio.run(main())

From 897fbd2f09abfc1c3c5cc30fc2fcf17ed549ae80 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 27 Oct 2025 15:42:09 +0100
Subject: [PATCH 04/28] load test now uses s3 bucket

---
 cognee/tests/test_load.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py
index 09e2db084..f8d007d28 100644
--- a/cognee/tests/test_load.py
+++ b/cognee/tests/test_load.py
@@ -17,7 +17,9 @@ async def process_and_search(num_of_searches):
 
     await asyncio.gather(
         *[
-            cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION)
+            cognee.search(
+                query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION
+            )
             for _ in range(num_of_searches)
         ]
     )
@@ -28,9 +30,6 @@ async def process_and_search(num_of_searches):
 
 
 async def main():
-    file_path = os.path.join(
-        pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf"
-    )
     data_directory_path = str(
         pathlib.Path(
             os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load")
@@ -52,8 +51,8 @@ async def main():
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
 
-    for i in range(num_of_pdfs):
-        await cognee.add(file_path, dataset_name=f"dataset_{i}")
+    s3_input = "s3://cognee-load-test-s3-bucket"
+    await cognee.add(s3_input)
 
     recorded_times = await asyncio.gather(
         *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)]

From 2b083dd0f110e44341d30a6228abb18591cfabac Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Tue, 28 Oct 2025 09:27:33 +0100
Subject: [PATCH 05/28] small changes to load test

---
 cognee/tests/test_load.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py
index f8d007d28..a09ce053d 100644
--- a/cognee/tests/test_load.py
+++ b/cognee/tests/test_load.py
@@ -48,15 +48,15 @@ async def main():
     upper_boundary_minutes = 10
     average_minutes = 8
 
-    await cognee.prune.prune_data()
-    await cognee.prune.prune_system(metadata=True)
+    recorded_times = []
+    for _ in range(num_of_reps):
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
 
-    s3_input = "s3://cognee-load-test-s3-bucket"
-    await cognee.add(s3_input)
+        s3_input = "s3://cognee-test-load-s3-bucket"
+        await cognee.add(s3_input)
 
-    recorded_times = await asyncio.gather(
-        *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)]
-    )
+        recorded_times.append(await process_and_search(num_of_pdfs))
 
     average_recorded_time = sum(recorded_times) / len(recorded_times)
 

From fb7e74eaa8d8bdae50021e0a24e7e6bf6bfc2a09 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 29 Oct 2025 16:28:09 +0100
Subject: [PATCH 06/28] refactor: Enable multi user mode by default if graph
 and vector db providers support it

---
 .env.template                                 |  5 ++-
 cognee/context_global_variables.py            | 43 ++++++++++++++++++-
 cognee/modules/search/methods/search.py       |  6 +--
 .../users/methods/get_authenticated_user.py   |  3 +-
 .../relational_database_migration_example.py  |  3 ++
 logs/.gitkeep                                 |  0
 logs/README.md                                | 31 -------------
 7 files changed, 53 insertions(+), 38 deletions(-)
 delete mode 100644 logs/.gitkeep
 delete mode 100644 logs/README.md

diff --git a/.env.template b/.env.template
index 89ac06830..8e1bdd23f 100644
--- a/.env.template
+++ b/.env.template
@@ -169,8 +169,9 @@ REQUIRE_AUTHENTICATION=False
 #       Vector: LanceDB
 #       Graph: KuzuDB
 #
-# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
-ENABLE_BACKEND_ACCESS_CONTROL=False
+# It enforces creation of databases per Cognee user + dataset. Does not work with some graph and database providers.
+# Disable mode when using not supported graph/vector databases.
+ENABLE_BACKEND_ACCESS_CONTROL=True
 
 ################################################################################
 # ☁️ Cloud Sync Settings
diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py
index d52de4b4e..8ad855724 100644
--- a/cognee/context_global_variables.py
+++ b/cognee/context_global_variables.py
@@ -4,6 +4,8 @@ from typing import Union
 from uuid import UUID
 
 from cognee.base_config import get_base_config
+from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
+from cognee.infrastructure.databases.graph.config import get_graph_context_config
 from cognee.infrastructure.databases.utils import get_or_create_dataset_database
 from cognee.infrastructure.files.storage.config import file_storage_config
 from cognee.modules.users.methods import get_user
@@ -14,11 +16,50 @@ vector_db_config = ContextVar("vector_db_config", default=None)
 graph_db_config = ContextVar("graph_db_config", default=None)
 session_user = ContextVar("session_user", default=None)
 
+vector_dbs_with_multi_user_support = ["lancedb"]
+graph_dbs_with_multi_user_support = ["kuzu"]
+
 
 async def set_session_user_context_variable(user):
     session_user.set(user)
 
 
+def check_multi_user_support():
+    graph_db_config = get_graph_context_config()
+    vector_db_config = get_vectordb_context_config()
+    if (
+        graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support
+        and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support
+    ):
+        return True
+    else:
+        return False
+
+
+def check_backend_access_control_mode():
+    backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
+    if backend_access_control is None:
+        # If backend access control is not defined in environment variables,
+        # enable it by default if graph and vector DBs can support it, otherwise disable it
+        multi_user_support = check_multi_user_support()
+        if multi_user_support:
+            return "true"
+        else:
+            return "false"
+    elif backend_access_control.lower() == "true":
+        # If enabled, ensure that the current graph and vector DBs can support it
+        multi_user_support = check_multi_user_support()
+        if not multi_user_support:
+            raise EnvironmentError(
+                "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
+            )
+        else:
+            return "true"
+    else:
+        # If explicitly disabled, return false
+        return "false"
+
+
 async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
     """
     If backend access control is enabled this function will ensure all datasets have their own databases,
@@ -40,7 +81,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
 
     base_config = get_base_config()
 
-    if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
+    if not check_backend_access_control_mode() == "true":
         return
 
     user = await get_user(user_id)
diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py
index aab004924..e3d7c220e 100644
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@@ -1,4 +1,3 @@
-import os
 import json
 import asyncio
 from uuid import UUID
@@ -9,6 +8,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.shared.logging_utils import get_logger
 from cognee.shared.utils import send_telemetry
 from cognee.context_global_variables import set_database_global_context_variables
+from cognee.context_global_variables import check_backend_access_control_mode
 
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
@@ -74,7 +74,7 @@ async def search(
     )
 
     # Use search function filtered by permissions if access control is enabled
-    if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
+    if check_backend_access_control_mode() == "true":
         search_results = await authorized_search(
             query_type=query_type,
             query_text=query_text,
@@ -156,7 +156,7 @@ async def search(
         )
     else:
         # This is for maintaining backwards compatibility
-        if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
+        if check_backend_access_control_mode() == "true":
             return_value = []
             for search_result in search_results:
                 prepared_search_results = await prepare_search_result(search_result)
diff --git a/cognee/modules/users/methods/get_authenticated_user.py b/cognee/modules/users/methods/get_authenticated_user.py
index d78215892..34d82586e 100644
--- a/cognee/modules/users/methods/get_authenticated_user.py
+++ b/cognee/modules/users/methods/get_authenticated_user.py
@@ -5,6 +5,7 @@ from ..models import User
 from ..get_fastapi_users import get_fastapi_users
 from .get_default_user import get_default_user
 from cognee.shared.logging_utils import get_logger
+from cognee.context_global_variables import check_backend_access_control_mode
 
 
 logger = get_logger("get_authenticated_user")
@@ -12,7 +13,7 @@ logger = get_logger("get_authenticated_user")
 # Check environment variable to determine authentication requirement
 REQUIRE_AUTHENTICATION = (
     os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
-    or os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true"
+    or check_backend_access_control_mode() == "true"
 )
 
 fastapi_users = get_fastapi_users()
diff --git a/examples/python/relational_database_migration_example.py b/examples/python/relational_database_migration_example.py
index 7e87347bc..98482cb4b 100644
--- a/examples/python/relational_database_migration_example.py
+++ b/examples/python/relational_database_migration_example.py
@@ -31,6 +31,9 @@ from cognee.infrastructure.databases.vector.pgvector import (
 
 
 async def main():
+    # Disable backend access control to migrate relational data
+    os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
+
     # Clean all data stored in Cognee
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
diff --git a/logs/.gitkeep b/logs/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/logs/README.md b/logs/README.md
deleted file mode 100644
index 96ef613b5..000000000
--- a/logs/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Logs Directory
-
-This directory contains the application logs for Cognee.
-
-## Log Files
-
-- Log files are named by date in the format `YYYY-MM-DD_HH-MM-SS.log`
-- Logs are stored in plain text format with a consistent structure
-- Each log entry includes:
-  - Timestamp (ISO format)
-  - Log level (padded to consistent width)
-  - Message
-  - Additional context (if any)
-  - Logger name (in square brackets)
-- Exception tracebacks are included for error logs
-
-## Sample Log Entry
-
-```
-2025-03-27T13:05:27.481446Z [INFO    ] Structured log message user_id=user123 action=login status=success [TestLogger]
-```
-
-## Retention Policy
-
-The system automatically keeps only the 10 most recent log files. Older log files are automatically deleted when new log files are created. This prevents excessive disk usage in long-running deployments.
-
-## Usage
-
-Logs are automatically generated by the application's logging mechanism. No manual actions are required to use this feature.
-
-The logs directory structure is preserved in version control, but the log files themselves are gitignored.

From 6a7660a7c10892657422307b90788d0d6f80b8ab Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 29 Oct 2025 16:31:42 +0100
Subject: [PATCH 07/28] refactor: Return logs folder

---
 logs/.gitkeep  |  0
 logs/README.md | 31 +++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 logs/.gitkeep
 create mode 100644 logs/README.md

diff --git a/logs/.gitkeep b/logs/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/logs/README.md b/logs/README.md
new file mode 100644
index 000000000..96ef613b5
--- /dev/null
+++ b/logs/README.md
@@ -0,0 +1,31 @@
+# Logs Directory
+
+This directory contains the application logs for Cognee.
+
+## Log Files
+
+- Log files are named by date in the format `YYYY-MM-DD_HH-MM-SS.log`
+- Logs are stored in plain text format with a consistent structure
+- Each log entry includes:
+  - Timestamp (ISO format)
+  - Log level (padded to consistent width)
+  - Message
+  - Additional context (if any)
+  - Logger name (in square brackets)
+- Exception tracebacks are included for error logs
+
+## Sample Log Entry
+
+```
+2025-03-27T13:05:27.481446Z [INFO    ] Structured log message user_id=user123 action=login status=success [TestLogger]
+```
+
+## Retention Policy
+
+The system automatically keeps only the 10 most recent log files. Older log files are automatically deleted when new log files are created. This prevents excessive disk usage in long-running deployments.
+
+## Usage
+
+Logs are automatically generated by the application's logging mechanism. No manual actions are required to use this feature.
+
+The logs directory structure is preserved in version control, but the log files themselves are gitignored.

From 6572cf5cb9bcd7bc3906dc9149a22759069e79b2 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 29 Oct 2025 16:35:44 +0100
Subject: [PATCH 08/28] refactor: use boolean instead of string

---
 cognee/context_global_variables.py                     | 10 +++++-----
 cognee/modules/search/methods/search.py                |  4 ++--
 cognee/modules/users/methods/get_authenticated_user.py |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py
index 8ad855724..b4b848192 100644
--- a/cognee/context_global_variables.py
+++ b/cognee/context_global_variables.py
@@ -43,9 +43,9 @@ def check_backend_access_control_mode():
         # enable it by default if graph and vector DBs can support it, otherwise disable it
         multi_user_support = check_multi_user_support()
         if multi_user_support:
-            return "true"
+            return True
         else:
-            return "false"
+            return False
     elif backend_access_control.lower() == "true":
         # If enabled, ensure that the current graph and vector DBs can support it
         multi_user_support = check_multi_user_support()
@@ -54,10 +54,10 @@ def check_backend_access_control_mode():
                 "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
             )
         else:
-            return "true"
+            return True
     else:
         # If explicitly disabled, return false
-        return "false"
+        return False
 
 
 async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
@@ -81,7 +81,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
 
     base_config = get_base_config()
 
-    if not check_backend_access_control_mode() == "true":
+    if not check_backend_access_control_mode():
         return
 
     user = await get_user(user_id)
diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py
index e3d7c220e..4a67093e8 100644
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@@ -74,7 +74,7 @@ async def search(
     )
 
     # Use search function filtered by permissions if access control is enabled
-    if check_backend_access_control_mode() == "true":
+    if check_backend_access_control_mode():
         search_results = await authorized_search(
             query_type=query_type,
             query_text=query_text,
@@ -156,7 +156,7 @@ async def search(
         )
     else:
         # This is for maintaining backwards compatibility
-        if check_backend_access_control_mode() == "true":
+        if check_backend_access_control_mode():
             return_value = []
             for search_result in search_results:
                 prepared_search_results = await prepare_search_result(search_result)
diff --git a/cognee/modules/users/methods/get_authenticated_user.py b/cognee/modules/users/methods/get_authenticated_user.py
index 34d82586e..3cc16f3a8 100644
--- a/cognee/modules/users/methods/get_authenticated_user.py
+++ b/cognee/modules/users/methods/get_authenticated_user.py
@@ -13,7 +13,7 @@ logger = get_logger("get_authenticated_user")
 # Check environment variable to determine authentication requirement
 REQUIRE_AUTHENTICATION = (
     os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
-    or check_backend_access_control_mode() == "true"
+    or check_backend_access_control_mode()
 )
 
 fastapi_users = get_fastapi_users()

From d1581e9ebab143930acf1cec404dda083fb06a9f Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 29 Oct 2025 17:36:56 +0100
Subject: [PATCH 09/28] refactor: disable permissions for code graph example

---
 examples/python/code_graph_example.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/python/code_graph_example.py b/examples/python/code_graph_example.py
index 431069050..1b476a2c3 100644
--- a/examples/python/code_graph_example.py
+++ b/examples/python/code_graph_example.py
@@ -1,5 +1,7 @@
 import argparse
 import asyncio
+import os
+
 import cognee
 from cognee import SearchType
 from cognee.shared.logging_utils import setup_logging, ERROR
@@ -8,6 +10,9 @@ from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
 
 
 async def main(repo_path, include_docs):
+    # Disable permissions feature for this example
+    os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
+
     run_status = False
     async for run_status in run_code_graph_pipeline(repo_path, include_docs=include_docs):
         run_status = run_status

From eec96e4f1fb30e692b6e448aa9b1e553c0fead98 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 29 Oct 2025 19:14:53 +0100
Subject: [PATCH 10/28] refactor: fix search result for library test

---
 cognee/tests/test_library.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py
index 81f81ee61..893b836c0 100755
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@@ -90,15 +90,17 @@ async def main():
         )
 
     search_results = await cognee.search(
-        query_type=SearchType.GRAPH_COMPLETION, query_text="What information do you contain?"
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text="What information do you contain?",
+        dataset_ids=[pipeline_run_obj.dataset_id],
     )
-    assert "Mark" in search_results[0], (
+    assert "Mark" in search_results[0]["search_result"][0], (
         "Failed to update document, no mention of Mark in search results"
     )
-    assert "Cindy" in search_results[0], (
+    assert "Cindy" in search_results[0]["search_result"][0], (
         "Failed to update document, no mention of Cindy in search results"
     )
-    assert "Artificial intelligence" not in search_results[0], (
+    assert "Artificial intelligence" not in search_results[0]["search_result"][0], (
         "Failed to update document, Artificial intelligence still mentioned in search results"
     )
 

From ee0ecd52d8115396523c1a62b2c99b3178f5d182 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 16:25:34 +0100
Subject: [PATCH 11/28] refactor: Rewrite tests to work with multi-user mode by
 default

---
 .../users/test_conditional_authentication.py  | 63 -------------------
 1 file changed, 63 deletions(-)

diff --git a/cognee/tests/unit/modules/users/test_conditional_authentication.py b/cognee/tests/unit/modules/users/test_conditional_authentication.py
index c4368d796..6568c3cb0 100644
--- a/cognee/tests/unit/modules/users/test_conditional_authentication.py
+++ b/cognee/tests/unit/modules/users/test_conditional_authentication.py
@@ -107,29 +107,10 @@ class TestConditionalAuthenticationIntegration:
         # REQUIRE_AUTHENTICATION should be a boolean
         assert isinstance(REQUIRE_AUTHENTICATION, bool)
 
-        # Currently should be False (optional authentication)
-        assert not REQUIRE_AUTHENTICATION
-
 
 class TestConditionalAuthenticationEnvironmentVariables:
     """Test environment variable handling."""
 
-    def test_require_authentication_default_false(self):
-        """Test that REQUIRE_AUTHENTICATION defaults to false when imported with no env vars."""
-        with patch.dict(os.environ, {}, clear=True):
-            # Remove module from cache to force fresh import
-            module_name = "cognee.modules.users.methods.get_authenticated_user"
-            if module_name in sys.modules:
-                del sys.modules[module_name]
-
-            # Import after patching environment - module will see empty environment
-            from cognee.modules.users.methods.get_authenticated_user import (
-                REQUIRE_AUTHENTICATION,
-            )
-
-            importlib.invalidate_caches()
-            assert not REQUIRE_AUTHENTICATION
-
     def test_require_authentication_true(self):
         """Test that REQUIRE_AUTHENTICATION=true is parsed correctly when imported."""
         with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "true"}):
@@ -145,50 +126,6 @@ class TestConditionalAuthenticationEnvironmentVariables:
 
             assert REQUIRE_AUTHENTICATION
 
-    def test_require_authentication_false_explicit(self):
-        """Test that REQUIRE_AUTHENTICATION=false is parsed correctly when imported."""
-        with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "false"}):
-            # Remove module from cache to force fresh import
-            module_name = "cognee.modules.users.methods.get_authenticated_user"
-            if module_name in sys.modules:
-                del sys.modules[module_name]
-
-            # Import after patching environment - module will see REQUIRE_AUTHENTICATION=false
-            from cognee.modules.users.methods.get_authenticated_user import (
-                REQUIRE_AUTHENTICATION,
-            )
-
-            assert not REQUIRE_AUTHENTICATION
-
-    def test_require_authentication_case_insensitive(self):
-        """Test that environment variable parsing is case insensitive when imported."""
-        test_cases = ["TRUE", "True", "tRuE", "FALSE", "False", "fAlSe"]
-
-        for case in test_cases:
-            with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": case}):
-                # Remove module from cache to force fresh import
-                module_name = "cognee.modules.users.methods.get_authenticated_user"
-                if module_name in sys.modules:
-                    del sys.modules[module_name]
-
-                # Import after patching environment
-                from cognee.modules.users.methods.get_authenticated_user import (
-                    REQUIRE_AUTHENTICATION,
-                )
-
-                expected = case.lower() == "true"
-                assert REQUIRE_AUTHENTICATION == expected, f"Failed for case: {case}"
-
-    def test_current_require_authentication_value(self):
-        """Test that the current REQUIRE_AUTHENTICATION module value is as expected."""
-        from cognee.modules.users.methods.get_authenticated_user import (
-            REQUIRE_AUTHENTICATION,
-        )
-
-        # The module-level variable should currently be False (set at import time)
-        assert isinstance(REQUIRE_AUTHENTICATION, bool)
-        assert not REQUIRE_AUTHENTICATION
-
 
 class TestConditionalAuthenticationEdgeCases:
     """Test edge cases and error scenarios."""

From 9d8430cfb08e17467390ff53e57d330885c6c7d9 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 16:52:04 +0100
Subject: [PATCH 12/28] refactor: Update unit tests for require auth

---
 .../test_conditional_authentication_endpoints.py  | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/cognee/tests/unit/api/test_conditional_authentication_endpoints.py b/cognee/tests/unit/api/test_conditional_authentication_endpoints.py
index 2eabee91a..6cc37ef38 100644
--- a/cognee/tests/unit/api/test_conditional_authentication_endpoints.py
+++ b/cognee/tests/unit/api/test_conditional_authentication_endpoints.py
@@ -1,3 +1,4 @@
+import os
 import pytest
 from unittest.mock import patch, AsyncMock, MagicMock
 from uuid import uuid4
@@ -5,8 +6,6 @@ from fastapi.testclient import TestClient
 from types import SimpleNamespace
 import importlib
 
-from cognee.api.client import app
-
 
 # Fixtures for reuse across test classes
 @pytest.fixture
@@ -32,6 +31,10 @@ def mock_authenticated_user():
     )
 
 
+# To turn off authentication we need to set the environment variable before importing the module
+# Also both require_authentication and backend access control must be false
+os.environ["REQUIRE_AUTHENTICATION"] = "false"
+os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
 gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user")
 
 
@@ -40,6 +43,8 @@ class TestConditionalAuthenticationEndpoints:
 
     @pytest.fixture
     def client(self):
+        from cognee.api.client import app
+
         """Create a test client."""
         return TestClient(app)
 
@@ -133,6 +138,8 @@ class TestConditionalAuthenticationBehavior:
 
     @pytest.fixture
     def client(self):
+        from cognee.api.client import app
+
         return TestClient(app)
 
     @pytest.mark.parametrize(
@@ -209,6 +216,8 @@ class TestConditionalAuthenticationErrorHandling:
 
     @pytest.fixture
     def client(self):
+        from cognee.api.client import app
+
         return TestClient(app)
 
     @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
@@ -232,7 +241,7 @@ class TestConditionalAuthenticationErrorHandling:
         # The exact error message may vary depending on the actual database connection
         # The important thing is that we get a 500 error when user creation fails
 
-    def test_current_environment_configuration(self):
+    def test_current_environment_configuration(self, client):
         """Test that current environment configuration is working properly."""
         # This tests the actual module state without trying to change it
         from cognee.modules.users.methods.get_authenticated_user import (

From e061f34a28bf9cd27453ea6aac8abf215c7bde8f Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 17:13:10 +0100
Subject: [PATCH 13/28] fix: Resolve issue with dataset names for example

---
 examples/python/feedback_enrichment_minimal_example.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/python/feedback_enrichment_minimal_example.py b/examples/python/feedback_enrichment_minimal_example.py
index 11ef20830..8954bd5f6 100644
--- a/examples/python/feedback_enrichment_minimal_example.py
+++ b/examples/python/feedback_enrichment_minimal_example.py
@@ -67,7 +67,6 @@ async def run_feedback_enrichment_memify(last_n: int = 5):
         extraction_tasks=extraction_tasks,
         enrichment_tasks=enrichment_tasks,
         data=[{}],  # A placeholder to prevent fetching the entire graph
-        dataset="feedback_enrichment_minimal",
     )
 
 

From 45bb3130c695260af9ccb00e756a0b4d22f0a85b Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 17:40:00 +0100
Subject: [PATCH 14/28] fix: Use same dataset name accross cognee calls

---
 cognee/tests/test_feedback_enrichment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cognee/tests/test_feedback_enrichment.py b/cognee/tests/test_feedback_enrichment.py
index 02d90db32..378cb0e45 100644
--- a/cognee/tests/test_feedback_enrichment.py
+++ b/cognee/tests/test_feedback_enrichment.py
@@ -133,7 +133,7 @@ async def main():
         extraction_tasks=extraction_tasks,
         enrichment_tasks=enrichment_tasks,
         data=[{}],
-        dataset="feedback_enrichment_test_memify",
+        dataset=dataset_name,
     )
 
     nodes_after, edges_after = await graph_engine.get_graph_data()

From 1b483276b0077c57eefdc7e7aa93d58501cf7840 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 18:04:27 +0100
Subject: [PATCH 15/28] fix: disable backend access control for rel db test

---
 cognee/tests/test_relational_db_migration.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cognee/tests/test_relational_db_migration.py b/cognee/tests/test_relational_db_migration.py
index 2b69ce854..4557e9e2f 100644
--- a/cognee/tests/test_relational_db_migration.py
+++ b/cognee/tests/test_relational_db_migration.py
@@ -27,6 +27,9 @@ def normalize_node_name(node_name: str) -> str:
 
 
 async def setup_test_db():
+    # Disable backend access control to migrate relational data
+    os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
+
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
 

From 3c09433adead92f8a09093069a6d88de63c28409 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 31 Oct 2025 13:57:12 +0100
Subject: [PATCH 16/28] fix: Resolve docling test

---
 cognee/tests/test_add_docling_document.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cognee/tests/test_add_docling_document.py b/cognee/tests/test_add_docling_document.py
index 2c82af66f..c5aa4e9d1 100644
--- a/cognee/tests/test_add_docling_document.py
+++ b/cognee/tests/test_add_docling_document.py
@@ -39,12 +39,12 @@ async def main():
 
     answer = await cognee.search("Do programmers change light bulbs?")
     assert len(answer) != 0
-    lowercase_answer = answer[0].lower()
+    lowercase_answer = answer[0]["search_result"][0].lower()
     assert ("no" in lowercase_answer) or ("none" in lowercase_answer)
 
     answer = await cognee.search("What colours are there in the presentation table?")
     assert len(answer) != 0
-    lowercase_answer = answer[0].lower()
+    lowercase_answer = answer[0]["search_result"][0].lower()
     assert (
         ("red" in lowercase_answer)
         and ("blue" in lowercase_answer)

From 00a1fe71d76ae62deac5832751366061f21bc96f Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 31 Oct 2025 14:33:07 +0100
Subject: [PATCH 17/28] fix: Use multi-user mode search

---
 examples/python/agentic_reasoning_procurement_example.py | 2 +-
 examples/python/memify_coding_agent_example.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/python/agentic_reasoning_procurement_example.py b/examples/python/agentic_reasoning_procurement_example.py
index 5aa3caa70..4e9d2d7e4 100644
--- a/examples/python/agentic_reasoning_procurement_example.py
+++ b/examples/python/agentic_reasoning_procurement_example.py
@@ -168,7 +168,7 @@ async def run_procurement_example():
         for q in questions:
             print(f"Question: \n{q}")
             results = await procurement_system.search_memory(q, search_categories=[category])
-            top_answer = results[category][0]
+            top_answer = results[category][0]["search_result"][0]
             print(f"Answer: \n{top_answer.strip()}\n")
             research_notes[category].append({"question": q, "answer": top_answer})
 
diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py
index 1fd3b1528..4a087ba61 100644
--- a/examples/python/memify_coding_agent_example.py
+++ b/examples/python/memify_coding_agent_example.py
@@ -89,7 +89,7 @@ async def main():
     )
 
     print("Coding rules created by memify:")
-    for coding_rule in coding_rules:
+    for coding_rule in coding_rules[0]["search_result"][0]:
         print("- " + coding_rule)
 
     # Visualize new graph with added memify context

From 4c8b8211979fc12b6e46a911eba1c2e2610187d8 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 31 Oct 2025 14:55:52 +0100
Subject: [PATCH 18/28] fix: resolve test failing

---
 cognee/tests/test_search_db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cognee/tests/test_search_db.py b/cognee/tests/test_search_db.py
index e24abd0f5..bcc4529a9 100644
--- a/cognee/tests/test_search_db.py
+++ b/cognee/tests/test_search_db.py
@@ -146,7 +146,7 @@ async def main():
         assert len(search_results) == 1, (
             f"{name}: expected single-element list, got {len(search_results)}"
         )
-        text = search_results[0]
+        text = search_results[0]["search_result"][0]
         assert isinstance(text, str), f"{name}: element should be a string"
         assert text.strip(), f"{name}: string should not be empty"
         assert "netherlands" in text.lower(), (

From f368a1a4d5d79a4485bc52ff5d9f16fc909942d2 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 31 Oct 2025 20:10:05 +0100
Subject: [PATCH 19/28] fix: set tests to not use multi-user mode

---
 .github/workflows/search_db_tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/search_db_tests.yml b/.github/workflows/search_db_tests.yml
index e3e46dd97..118c1c06c 100644
--- a/.github/workflows/search_db_tests.yml
+++ b/.github/workflows/search_db_tests.yml
@@ -84,6 +84,7 @@ jobs:
           GRAPH_DATABASE_PROVIDER: 'neo4j'
           VECTOR_DB_PROVIDER: 'lancedb'
           DB_PROVIDER: 'sqlite'
+          ENABLE_BACKEND_ACCESS_CONTROL: 'false'
           GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
           GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
           GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
@@ -135,6 +136,7 @@ jobs:
             EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
             GRAPH_DATABASE_PROVIDER: 'kuzu'
             VECTOR_DB_PROVIDER: 'pgvector'
+            ENABLE_BACKEND_ACCESS_CONTROL: 'false'
             DB_PROVIDER: 'postgres'
             DB_NAME: 'cognee_db'
             DB_HOST: '127.0.0.1'
@@ -197,6 +199,7 @@ jobs:
           GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
           GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
           GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
+          ENABLE_BACKEND_ACCESS_CONTROL: 'false'
           DB_NAME: cognee_db
           DB_HOST: 127.0.0.1
           DB_PORT: 5432

From 2ab2cffd07ed35a268362af48c2fb668674509f1 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Mon, 3 Nov 2025 16:37:03 +0100
Subject: [PATCH 20/28] chore: update test_search_db to work with all graph
 providers

---
 cognee/tests/test_search_db.py    | 8 +++++++-
 examples/python/simple_example.py | 8 --------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/cognee/tests/test_search_db.py b/cognee/tests/test_search_db.py
index bcc4529a9..ea3f0ea44 100644
--- a/cognee/tests/test_search_db.py
+++ b/cognee/tests/test_search_db.py
@@ -146,7 +146,13 @@ async def main():
         assert len(search_results) == 1, (
             f"{name}: expected single-element list, got {len(search_results)}"
         )
-        text = search_results[0]["search_result"][0]
+
+        from cognee.context_global_variables import check_backend_access_control_mode
+
+        if check_backend_access_control_mode():
+            text = search_results[0]["search_result"][0]
+        else:
+            text = search_results[0]
         assert isinstance(text, str), f"{name}: element should be a string"
         assert text.strip(), f"{name}: string should not be empty"
         assert "netherlands" in text.lower(), (
diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py
index c13e48f85..237a8295e 100644
--- a/examples/python/simple_example.py
+++ b/examples/python/simple_example.py
@@ -59,14 +59,6 @@ async def main():
     for result_text in search_results:
         print(result_text)
 
-    # Example output:
-    # ({'id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'natural language processing', 'description': 'An interdisciplinary subfield of computer science and information retrieval.'}, {'relationship_name': 'is_a_subfield_of', 'source_node_id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'target_node_id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 15, 473137, tzinfo=datetime.timezone.utc)}, {'id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'computer science', 'description': 'The study of computation and information processing.'})
-    # (...)
-    # It represents nodes and relationships in the knowledge graph:
-    # - The first element is the source node (e.g., 'natural language processing').
-    # - The second element is the relationship between nodes (e.g., 'is_a_subfield_of').
-    # - The third element is the target node (e.g., 'computer science').
-
 
 if __name__ == "__main__":
     logger = setup_logging(log_level=ERROR)

From 4424bdc76471342119d59943c38d392dac9d72b0 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 3 Nov 2025 17:06:51 +0100
Subject: [PATCH 21/28] test: fix path based on pr comment

---
 cognee/tests/test_load.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py
index a09ce053d..b38466bc7 100644
--- a/cognee/tests/test_load.py
+++ b/cognee/tests/test_load.py
@@ -30,17 +30,10 @@ async def process_and_search(num_of_searches):
 
 
 async def main():
-    data_directory_path = str(
-        pathlib.Path(
-            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load")
-        ).resolve()
-    )
+    data_directory_path = os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load")
     cognee.config.data_root_directory(data_directory_path)
-    cognee_directory_path = str(
-        pathlib.Path(
-            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load")
-        ).resolve()
-    )
+
+    cognee_directory_path = os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load")
     cognee.config.system_root_directory(cognee_directory_path)
 
     num_of_pdfs = 10

From eb8df45dab2cb7a07de7eb97570d364790f80080 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 3 Nov 2025 18:10:19 +0100
Subject: [PATCH 22/28] test: increase file descriptor limit on workflow load
 test

---
 .github/workflows/e2e_tests.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index cf704c76a..79df3ff6b 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -463,6 +463,12 @@ jobs:
         with:
           python-version: '3.11.x'
 
+      - name: Set File Descriptor Limit
+        run: sudo prlimit --pid $$ --nofile=4096:4096
+
+      - name: Verify File Descriptor Limit
+        run: ulimit -n
+
       - name: Dependencies already installed
         run: echo "Dependencies already installed in setup"
 
@@ -478,4 +484,9 @@ jobs:
           EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
           EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          STORAGE_BACKEND: s3
+          AWS_REGION: eu-west-1
+          AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
         run: uv run python ./cognee/tests/test_load.py
\ No newline at end of file

From a7d63df98c7ad21a40679c1b821acb30690c65d8 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 3 Nov 2025 18:15:18 +0100
Subject: [PATCH 23/28] test: add extra aws dependency to load test

---
 .github/workflows/e2e_tests.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index 79df3ff6b..0596f22d3 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -267,8 +267,6 @@ jobs:
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
         run: uv run python ./cognee/tests/test_edge_ingestion.py
 
-
-
   run_concurrent_subprocess_access_test:
     name: Concurrent Subprocess access test
     runs-on: ubuntu-latest
@@ -450,7 +448,6 @@ jobs:
           DB_PASSWORD: cognee
         run: uv run python ./cognee/tests/test_conversation_history.py
 
-
   test-load:
     name: Test Load
     runs-on: ubuntu-22.04
@@ -462,6 +459,7 @@ jobs:
         uses: ./.github/actions/cognee_setup
         with:
           python-version: '3.11.x'
+          extra-dependencies: "aws"
 
       - name: Set File Descriptor Limit
         run: sudo prlimit --pid $$ --nofile=4096:4096

From c81d06d364d3b1f114fb2e7e81db856e7389386d Mon Sep 17 00:00:00 2001
From: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Date: Mon, 3 Nov 2025 19:37:52 +0100
Subject: [PATCH 24/28] Update cognee/context_global_variables.py

Co-authored-by: Pavel Zorin <pazonec@yandex.ru>
---
 cognee/context_global_variables.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py
index b4b848192..6f3965441 100644
--- a/cognee/context_global_variables.py
+++ b/cognee/context_global_variables.py
@@ -41,11 +41,7 @@ def check_backend_access_control_mode():
     if backend_access_control is None:
         # If backend access control is not defined in environment variables,
         # enable it by default if graph and vector DBs can support it, otherwise disable it
-        multi_user_support = check_multi_user_support()
-        if multi_user_support:
-            return True
-        else:
-            return False
+        return check_multi_user_support()
     elif backend_access_control.lower() == "true":
         # If enabled, ensure that the current graph and vector DBs can support it
         multi_user_support = check_multi_user_support()

From 53521c2068319d340c3f2b396dbbc7f3f9c80523 Mon Sep 17 00:00:00 2001
From: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Date: Mon, 3 Nov 2025 19:42:51 +0100
Subject: [PATCH 25/28] Update cognee/context_global_variables.py

Co-authored-by: Pavel Zorin <pazonec@yandex.ru>
---
 cognee/context_global_variables.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py
index 6f3965441..3afbf6ff2 100644
--- a/cognee/context_global_variables.py
+++ b/cognee/context_global_variables.py
@@ -27,13 +27,10 @@ async def set_session_user_context_variable(user):
 def check_multi_user_support():
     graph_db_config = get_graph_context_config()
     vector_db_config = get_vectordb_context_config()
-    if (
+    return (
         graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support
         and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support
-    ):
-        return True
-    else:
-        return False
+    )
 
 
 def check_backend_access_control_mode():

From 46c509778f89d5bebbcbe5f7578159e45841ca2d Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Tue, 4 Nov 2025 12:06:16 +0100
Subject: [PATCH 26/28] refactor: Rename access control functions

---
 cognee/context_global_variables.py              | 17 +++++++----------
 cognee/modules/search/methods/search.py         |  6 +++---
 .../users/methods/get_authenticated_user.py     |  4 ++--
 cognee/tests/test_search_db.py                  |  4 ++--
 4 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py
index 3afbf6ff2..f17c9187a 100644
--- a/cognee/context_global_variables.py
+++ b/cognee/context_global_variables.py
@@ -24,7 +24,7 @@ async def set_session_user_context_variable(user):
     session_user.set(user)
 
 
-def check_multi_user_support():
+def multi_user_support_possible():
     graph_db_config = get_graph_context_config()
     vector_db_config = get_vectordb_context_config()
     return (
@@ -33,24 +33,21 @@ def check_multi_user_support():
     )
 
 
-def check_backend_access_control_mode():
+def backend_access_control_enabled():
     backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
     if backend_access_control is None:
         # If backend access control is not defined in environment variables,
         # enable it by default if graph and vector DBs can support it, otherwise disable it
-        return check_multi_user_support()
+        return multi_user_support_possible()
     elif backend_access_control.lower() == "true":
         # If enabled, ensure that the current graph and vector DBs can support it
-        multi_user_support = check_multi_user_support()
+        multi_user_support = multi_user_support_possible()
         if not multi_user_support:
             raise EnvironmentError(
                 "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
             )
-        else:
-            return True
-    else:
-        # If explicitly disabled, return false
-        return False
+        return True
+    return False
 
 
 async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
@@ -74,7 +71,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
 
     base_config = get_base_config()
 
-    if not check_backend_access_control_mode():
+    if not backend_access_control_enabled():
         return
 
     user = await get_user(user_id)
diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py
index 4a67093e8..5e465b239 100644
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@@ -8,7 +8,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.shared.logging_utils import get_logger
 from cognee.shared.utils import send_telemetry
 from cognee.context_global_variables import set_database_global_context_variables
-from cognee.context_global_variables import check_backend_access_control_mode
+from cognee.context_global_variables import backend_access_control_enabled
 
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
@@ -74,7 +74,7 @@ async def search(
     )
 
     # Use search function filtered by permissions if access control is enabled
-    if check_backend_access_control_mode():
+    if backend_access_control_enabled():
         search_results = await authorized_search(
             query_type=query_type,
             query_text=query_text,
@@ -156,7 +156,7 @@ async def search(
         )
     else:
         # This is for maintaining backwards compatibility
-        if check_backend_access_control_mode():
+        if backend_access_control_enabled():
             return_value = []
             for search_result in search_results:
                 prepared_search_results = await prepare_search_result(search_result)
diff --git a/cognee/modules/users/methods/get_authenticated_user.py b/cognee/modules/users/methods/get_authenticated_user.py
index 3cc16f3a8..d6d701737 100644
--- a/cognee/modules/users/methods/get_authenticated_user.py
+++ b/cognee/modules/users/methods/get_authenticated_user.py
@@ -5,7 +5,7 @@ from ..models import User
 from ..get_fastapi_users import get_fastapi_users
 from .get_default_user import get_default_user
 from cognee.shared.logging_utils import get_logger
-from cognee.context_global_variables import check_backend_access_control_mode
+from cognee.context_global_variables import backend_access_control_enabled
 
 
 logger = get_logger("get_authenticated_user")
@@ -13,7 +13,7 @@ logger = get_logger("get_authenticated_user")
 # Check environment variable to determine authentication requirement
 REQUIRE_AUTHENTICATION = (
     os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
-    or check_backend_access_control_mode()
+    or backend_access_control_enabled()
 )
 
 fastapi_users = get_fastapi_users()
diff --git a/cognee/tests/test_search_db.py b/cognee/tests/test_search_db.py
index ea3f0ea44..bd11dc62e 100644
--- a/cognee/tests/test_search_db.py
+++ b/cognee/tests/test_search_db.py
@@ -147,9 +147,9 @@ async def main():
             f"{name}: expected single-element list, got {len(search_results)}"
         )
 
-        from cognee.context_global_variables import check_backend_access_control_mode
+        from cognee.context_global_variables import backend_access_control_enabled
 
-        if check_backend_access_control_mode():
+        if backend_access_control_enabled():
             text = search_results[0]["search_result"][0]
         else:
             text = search_results[0]

From c0e5ce04cedbf3bc4511e73ed2e2276e21c2f978 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Thu, 6 Nov 2025 14:13:55 +0100
Subject: [PATCH 27/28] Fix: fixes session history test for multiuser mode
 (#1746)

<!-- .github/pull_request_template.md -->

## Description
Fixes failing session history test

## Type of Change
<!-- Please check the relevant option -->
- [x] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have added necessary documentation (if applicable)
- [x] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [x] I have linked any relevant issues in the description
- [x] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 .../persist_sessions_in_knowledge_graph.py           |  2 +-
 cognee/tasks/memify/cognify_session.py               |  7 ++++---
 cognee/tests/test_conversation_history.py            |  6 +++---
 .../modules/memify_tasks/test_cognify_session.py     | 12 ++++++++----
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py b/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py
index c0ba0a4d9..92d64c156 100644
--- a/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py
+++ b/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py
@@ -40,7 +40,7 @@ async def persist_sessions_in_knowledge_graph_pipeline(
     extraction_tasks = [Task(extract_user_sessions, session_ids=session_ids)]
 
     enrichment_tasks = [
-        Task(cognify_session),
+        Task(cognify_session, dataset_id=dataset_to_write[0].id),
     ]
 
     result = await memify(
diff --git a/cognee/tasks/memify/cognify_session.py b/cognee/tasks/memify/cognify_session.py
index 7c276169a..f53f9afb1 100644
--- a/cognee/tasks/memify/cognify_session.py
+++ b/cognee/tasks/memify/cognify_session.py
@@ -6,7 +6,7 @@ from cognee.shared.logging_utils import get_logger
 logger = get_logger("cognify_session")
 
 
-async def cognify_session(data):
+async def cognify_session(data, dataset_id=None):
     """
     Process and cognify session data into the knowledge graph.
 
@@ -16,6 +16,7 @@ async def cognify_session(data):
 
     Args:
         data: Session string containing Question, Context, and Answer information.
+        dataset_name: Name of dataset.
 
     Raises:
         CogneeValidationError: If data is None or empty.
@@ -28,9 +29,9 @@ async def cognify_session(data):
 
         logger.info("Processing session data for cognification")
 
-        await cognee.add(data, node_set=["user_sessions_from_cache"])
+        await cognee.add(data, dataset_id=dataset_id, node_set=["user_sessions_from_cache"])
         logger.debug("Session data added to cognee with node_set: user_sessions")
-        await cognee.cognify()
+        await cognee.cognify(datasets=[dataset_id])
         logger.info("Session data successfully cognified")
 
     except CogneeValidationError:
diff --git a/cognee/tests/test_conversation_history.py b/cognee/tests/test_conversation_history.py
index 6b5b737f1..783baf563 100644
--- a/cognee/tests/test_conversation_history.py
+++ b/cognee/tests/test_conversation_history.py
@@ -56,10 +56,10 @@ async def main():
         """DataCo is a data analytics company. They help businesses make sense of their data."""
     )
 
-    await cognee.add(text_1, dataset_name)
-    await cognee.add(text_2, dataset_name)
+    await cognee.add(data=text_1, dataset_name=dataset_name)
+    await cognee.add(data=text_2, dataset_name=dataset_name)
 
-    await cognee.cognify([dataset_name])
+    await cognee.cognify(datasets=[dataset_name])
 
     user = await get_default_user()
 
diff --git a/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py b/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py
index c23640fbd..8c2448287 100644
--- a/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py
+++ b/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py
@@ -16,9 +16,11 @@ async def test_cognify_session_success():
         patch("cognee.add", new_callable=AsyncMock) as mock_add,
         patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify,
     ):
-        await cognify_session(session_data)
+        await cognify_session(session_data, dataset_id="123")
 
-        mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"])
+        mock_add.assert_called_once_with(
+            session_data, dataset_id="123", node_set=["user_sessions_from_cache"]
+        )
         mock_cognify.assert_called_once()
 
 
@@ -101,7 +103,9 @@ async def test_cognify_session_with_special_characters():
         patch("cognee.add", new_callable=AsyncMock) as mock_add,
         patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify,
     ):
-        await cognify_session(session_data)
+        await cognify_session(session_data, dataset_id="123")
 
-        mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"])
+        mock_add.assert_called_once_with(
+            session_data, dataset_id="123", node_set=["user_sessions_from_cache"]
+        )
         mock_cognify.assert_called_once()

From 5bc83968f81982ad90f37e41c205c90bb1b9b3d5 Mon Sep 17 00:00:00 2001
From: lxobr <122801072+lxobr@users.noreply.github.com>
Date: Thu, 6 Nov 2025 15:22:48 +0100
Subject: [PATCH 28/28] feature: text chunker with overlap (#1732)

<!-- .github/pull_request_template.md -->

## Description
<!--
Please provide a clear, human-generated description of the changes in
this PR.
DO NOT use AI-generated descriptions. We want to understand your thought
process and reasoning.
-->
- Implements `TextChunkerWithOverlap` with configurable
`chunk_overlap_ratio`
- Abstracts chunk_data generation via `get_chunk_data` callable
(defaults to `chunk_by_paragraph`)
- Parametrized tests verify `TextChunker` and `TextChunkerWithOverlap`
(0% overlap) produce identical output for all edge cases.
- Overlap-specific tests validate `TextChunkerWithOverlap` behavior

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [x] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [x] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [x] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com>
---
 .../chunking/text_chunker_with_overlap.py     | 124 +++++++
 .../modules/chunking/test_text_chunker.py     | 248 ++++++++++++++
 .../test_text_chunker_with_overlap.py         | 324 ++++++++++++++++++
 3 files changed, 696 insertions(+)
 create mode 100644 cognee/modules/chunking/text_chunker_with_overlap.py
 create mode 100644 cognee/tests/unit/modules/chunking/test_text_chunker.py
 create mode 100644 cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py

diff --git a/cognee/modules/chunking/text_chunker_with_overlap.py b/cognee/modules/chunking/text_chunker_with_overlap.py
new file mode 100644
index 000000000..4b9c23079
--- /dev/null
+++ b/cognee/modules/chunking/text_chunker_with_overlap.py
@@ -0,0 +1,124 @@
+from cognee.shared.logging_utils import get_logger
+from uuid import NAMESPACE_OID, uuid5
+
+from cognee.tasks.chunks import chunk_by_paragraph
+from cognee.modules.chunking.Chunker import Chunker
+from .models.DocumentChunk import DocumentChunk
+
+logger = get_logger()
+
+
+class TextChunkerWithOverlap(Chunker):
+    def __init__(
+        self,
+        document,
+        get_text: callable,
+        max_chunk_size: int,
+        chunk_overlap_ratio: float = 0.0,
+        get_chunk_data: callable = None,
+    ):
+        super().__init__(document, get_text, max_chunk_size)
+        self._accumulated_chunk_data = []
+        self._accumulated_size = 0
+        self.chunk_overlap_ratio = chunk_overlap_ratio
+        self.chunk_overlap = int(max_chunk_size * chunk_overlap_ratio)
+
+        if get_chunk_data is not None:
+            self.get_chunk_data = get_chunk_data
+        elif chunk_overlap_ratio > 0:
+            paragraph_max_size = int(0.5 * chunk_overlap_ratio * max_chunk_size)
+            self.get_chunk_data = lambda text: chunk_by_paragraph(
+                text, paragraph_max_size, batch_paragraphs=True
+            )
+        else:
+            self.get_chunk_data = lambda text: chunk_by_paragraph(
+                text, self.max_chunk_size, batch_paragraphs=True
+            )
+
+    def _accumulation_overflows(self, chunk_data):
+        """Check if adding chunk_data would exceed max_chunk_size."""
+        return self._accumulated_size + chunk_data["chunk_size"] > self.max_chunk_size
+
+    def _accumulate_chunk_data(self, chunk_data):
+        """Add chunk_data to the current accumulation."""
+        self._accumulated_chunk_data.append(chunk_data)
+        self._accumulated_size += chunk_data["chunk_size"]
+
+    def _clear_accumulation(self):
+        """Reset accumulation, keeping overlap chunk_data based on chunk_overlap_ratio."""
+        if self.chunk_overlap == 0:
+            self._accumulated_chunk_data = []
+            self._accumulated_size = 0
+            return
+
+        # Keep chunk_data from the end that fit in overlap
+        overlap_chunk_data = []
+        overlap_size = 0
+
+        for chunk_data in reversed(self._accumulated_chunk_data):
+            if overlap_size + chunk_data["chunk_size"] <= self.chunk_overlap:
+                overlap_chunk_data.insert(0, chunk_data)
+                overlap_size += chunk_data["chunk_size"]
+            else:
+                break
+
+        self._accumulated_chunk_data = overlap_chunk_data
+        self._accumulated_size = overlap_size
+
+    def _create_chunk(self, text, size, cut_type, chunk_id=None):
+        """Create a DocumentChunk with standard metadata."""
+        try:
+            return DocumentChunk(
+                id=chunk_id or uuid5(NAMESPACE_OID, f"{str(self.document.id)}-{self.chunk_index}"),
+                text=text,
+                chunk_size=size,
+                is_part_of=self.document,
+                chunk_index=self.chunk_index,
+                cut_type=cut_type,
+                contains=[],
+                metadata={"index_fields": ["text"]},
+            )
+        except Exception as e:
+            logger.error(e)
+            raise e
+
+    def _create_chunk_from_accumulation(self):
+        """Create a DocumentChunk from current accumulated chunk_data."""
+        chunk_text = " ".join(chunk["text"] for chunk in self._accumulated_chunk_data)
+        return self._create_chunk(
+            text=chunk_text,
+            size=self._accumulated_size,
+            cut_type=self._accumulated_chunk_data[-1]["cut_type"],
+        )
+
+    def _emit_chunk(self, chunk_data):
+        """Emit a chunk when accumulation overflows."""
+        if len(self._accumulated_chunk_data) > 0:
+            chunk = self._create_chunk_from_accumulation()
+            self._clear_accumulation()
+            self._accumulate_chunk_data(chunk_data)
+        else:
+            # Handle single chunk_data exceeding max_chunk_size
+            chunk = self._create_chunk(
+                text=chunk_data["text"],
+                size=chunk_data["chunk_size"],
+                cut_type=chunk_data["cut_type"],
+                chunk_id=chunk_data["chunk_id"],
+            )
+
+        self.chunk_index += 1
+        return chunk
+
+    async def read(self):
+        async for content_text in self.get_text():
+            for chunk_data in self.get_chunk_data(content_text):
+                if not self._accumulation_overflows(chunk_data):
+                    self._accumulate_chunk_data(chunk_data)
+                    continue
+
+                yield self._emit_chunk(chunk_data)
+
+        if len(self._accumulated_chunk_data) == 0:
+            return
+
+        yield self._create_chunk_from_accumulation()
diff --git a/cognee/tests/unit/modules/chunking/test_text_chunker.py b/cognee/tests/unit/modules/chunking/test_text_chunker.py
new file mode 100644
index 000000000..d535f74b0
--- /dev/null
+++ b/cognee/tests/unit/modules/chunking/test_text_chunker.py
@@ -0,0 +1,248 @@
+"""Unit tests for TextChunker and TextChunkerWithOverlap behavioral equivalence."""
+
+import pytest
+from uuid import uuid4
+
+from cognee.modules.chunking.TextChunker import TextChunker
+from cognee.modules.chunking.text_chunker_with_overlap import TextChunkerWithOverlap
+from cognee.modules.data.processing.document_types import Document
+
+
+@pytest.fixture(params=["TextChunker", "TextChunkerWithOverlap"])
+def chunker_class(request):
+    """Parametrize tests to run against both implementations."""
+    return TextChunker if request.param == "TextChunker" else TextChunkerWithOverlap
+
+
+@pytest.fixture
+def make_text_generator():
+    """Factory for async text generators."""
+
+    def _factory(*texts):
+        async def gen():
+            for text in texts:
+                yield text
+
+        return gen
+
+    return _factory
+
+
+async def collect_chunks(chunker):
+    """Consume async generator and return list of chunks."""
+    chunks = []
+    async for chunk in chunker.read():
+        chunks.append(chunk)
+    return chunks
+
+
+@pytest.mark.asyncio
+async def test_empty_input_produces_no_chunks(chunker_class, make_text_generator):
+    """Empty input should yield no chunks."""
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator("")
+    chunker = chunker_class(document, get_text, max_chunk_size=512)
+    chunks = await collect_chunks(chunker)
+
+    assert len(chunks) == 0, "Empty input should produce no chunks"
+
+
+@pytest.mark.asyncio
+async def test_whitespace_only_input_emits_single_chunk(chunker_class, make_text_generator):
+    """Whitespace-only input should produce exactly one chunk with unchanged text."""
+    whitespace_text = "   \n\t   \r\n   "
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(whitespace_text)
+    chunker = chunker_class(document, get_text, max_chunk_size=512)
+    chunks = await collect_chunks(chunker)
+
+    assert len(chunks) == 1, "Whitespace-only input should produce exactly one chunk"
+    assert chunks[0].text == whitespace_text, "Chunk text should equal input (whitespace preserved)"
+    assert chunks[0].chunk_index == 0, "First chunk should have index 0"
+
+
+@pytest.mark.asyncio
+async def test_single_paragraph_below_limit_emits_one_chunk(chunker_class, make_text_generator):
+    """Single paragraph below limit should emit exactly one chunk."""
+    text = "This is a short paragraph."
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    chunker = chunker_class(document, get_text, max_chunk_size=512)
+    chunks = await collect_chunks(chunker)
+
+    assert len(chunks) == 1, "Single short paragraph should produce exactly one chunk"
+    assert chunks[0].text == text, "Chunk text should match input"
+    assert chunks[0].chunk_index == 0, "First chunk should have index 0"
+    assert chunks[0].chunk_size > 0, "Chunk should have positive size"
+
+
+@pytest.mark.asyncio
+async def test_oversized_paragraph_gets_emitted_as_a_single_chunk(
+    chunker_class, make_text_generator
+):
+    """Oversized paragraph from chunk_by_paragraph should be emitted as single chunk."""
+    text = ("A" * 1500) + ". Next sentence."
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    chunker = chunker_class(document, get_text, max_chunk_size=50)
+    chunks = await collect_chunks(chunker)
+
+    assert len(chunks) == 2, "Should produce 2 chunks (oversized paragraph + next sentence)"
+    assert chunks[0].chunk_size > 50, "First chunk should be oversized"
+    assert chunks[0].chunk_index == 0, "First chunk should have index 0"
+    assert chunks[1].chunk_index == 1, "Second chunk should have index 1"
+
+
+@pytest.mark.asyncio
+async def test_overflow_on_next_paragraph_emits_separate_chunk(chunker_class, make_text_generator):
+    """First paragraph near limit plus small paragraph should produce two separate chunks."""
+    first_para = " ".join(["word"] * 5)
+    second_para = "Short text."
+    text = first_para + " " + second_para
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    chunker = chunker_class(document, get_text, max_chunk_size=10)
+    chunks = await collect_chunks(chunker)
+
+    assert len(chunks) == 2, "Should produce 2 chunks due to overflow"
+    assert chunks[0].text.strip() == first_para, "First chunk should contain only first paragraph"
+    assert chunks[1].text.strip() == second_para, (
+        "Second chunk should contain only second paragraph"
+    )
+    assert chunks[0].chunk_index == 0, "First chunk should have index 0"
+    assert chunks[1].chunk_index == 1, "Second chunk should have index 1"
+
+
+@pytest.mark.asyncio
+async def test_small_paragraphs_batch_correctly(chunker_class, make_text_generator):
+    """Multiple small paragraphs should batch together with joiner spaces counted."""
+    paragraphs = [" ".join(["word"] * 12) for _ in range(40)]
+    text = " ".join(paragraphs)
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    chunker = chunker_class(document, get_text, max_chunk_size=49)
+    chunks = await collect_chunks(chunker)
+
+    assert len(chunks) == 20, (
+        "Should batch paragraphs (2 per chunk: 12 words × 2 tokens = 24, 24 + 1 joiner + 24 = 49)"
+    )
+    assert all(c.chunk_index == i for i, c in enumerate(chunks)), (
+        "Chunk indices should be sequential"
+    )
+    all_text = " ".join(chunk.text.strip() for chunk in chunks)
+    expected_text = " ".join(paragraphs)
+    assert all_text == expected_text, "All paragraph text should be preserved with correct spacing"
+
+
+@pytest.mark.asyncio
+async def test_alternating_large_and_small_paragraphs_dont_batch(
+    chunker_class, make_text_generator
+):
+    """Alternating near-max and small paragraphs should each become separate chunks."""
+    large1 = "word" * 15 + "."
+    small1 = "Short."
+    large2 = "word" * 15 + "."
+    small2 = "Tiny."
+    text = large1 + " " + small1 + " " + large2 + " " + small2
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    max_chunk_size = 10
+    get_text = make_text_generator(text)
+    chunker = chunker_class(document, get_text, max_chunk_size=max_chunk_size)
+    chunks = await collect_chunks(chunker)
+
+    assert len(chunks) == 4, "Should produce multiple chunks"
+    assert all(c.chunk_index == i for i, c in enumerate(chunks)), (
+        "Chunk indices should be sequential"
+    )
+    assert chunks[0].chunk_size > max_chunk_size, (
+        "First chunk should be oversized (large paragraph)"
+    )
+    assert chunks[1].chunk_size <= max_chunk_size, "Second chunk should be small (small paragraph)"
+    assert chunks[2].chunk_size > max_chunk_size, (
+        "Third chunk should be oversized (large paragraph)"
+    )
+    assert chunks[3].chunk_size <= max_chunk_size, "Fourth chunk should be small (small paragraph)"
+
+
+@pytest.mark.asyncio
+async def test_chunk_indices_and_ids_are_deterministic(chunker_class, make_text_generator):
+    """Running chunker twice on identical input should produce identical indices and IDs."""
+    sentence1 = "one " * 4 + ". "
+    sentence2 = "two " * 4 + ". "
+    sentence3 = "one " * 4 + ". "
+    sentence4 = "two " * 4 + ". "
+    text = sentence1 + sentence2 + sentence3 + sentence4
+    doc_id = uuid4()
+    max_chunk_size = 20
+
+    document1 = Document(
+        id=doc_id,
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text1 = make_text_generator(text)
+    chunker1 = chunker_class(document1, get_text1, max_chunk_size=max_chunk_size)
+    chunks1 = await collect_chunks(chunker1)
+
+    document2 = Document(
+        id=doc_id,
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text2 = make_text_generator(text)
+    chunker2 = chunker_class(document2, get_text2, max_chunk_size=max_chunk_size)
+    chunks2 = await collect_chunks(chunker2)
+
+    assert len(chunks1) == 2, "Should produce exactly 2 chunks (4 sentences, 2 per chunk)"
+    assert len(chunks2) == 2, "Should produce exactly 2 chunks (4 sentences, 2 per chunk)"
+    assert [c.chunk_index for c in chunks1] == [0, 1], "First run indices should be [0, 1]"
+    assert [c.chunk_index for c in chunks2] == [0, 1], "Second run indices should be [0, 1]"
+    assert chunks1[0].id == chunks2[0].id, "First chunk ID should be deterministic"
+    assert chunks1[1].id == chunks2[1].id, "Second chunk ID should be deterministic"
+    assert chunks1[0].id != chunks1[1].id, "Chunk IDs should be unique within a run"
diff --git a/cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py b/cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py
new file mode 100644
index 000000000..9d7be6936
--- /dev/null
+++ b/cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py
@@ -0,0 +1,324 @@
+"""Unit tests for TextChunkerWithOverlap overlap behavior."""
+
+import sys
+import pytest
+from uuid import uuid4
+from unittest.mock import patch
+
+from cognee.modules.chunking.text_chunker_with_overlap import TextChunkerWithOverlap
+from cognee.modules.data.processing.document_types import Document
+from cognee.tasks.chunks import chunk_by_paragraph
+
+
+@pytest.fixture
+def make_text_generator():
+    """Factory for async text generators."""
+
+    def _factory(*texts):
+        async def gen():
+            for text in texts:
+                yield text
+
+        return gen
+
+    return _factory
+
+
+@pytest.fixture
+def make_controlled_chunk_data():
+    """Factory for controlled chunk_data generators."""
+
+    def _factory(*sentences, chunk_size_per_sentence=10):
+        def _chunk_data(text):
+            return [
+                {
+                    "text": sentence,
+                    "chunk_size": chunk_size_per_sentence,
+                    "cut_type": "sentence",
+                    "chunk_id": uuid4(),
+                }
+                for sentence in sentences
+            ]
+
+        return _chunk_data
+
+    return _factory
+
+
+@pytest.mark.asyncio
+async def test_half_overlap_preserves_content_across_chunks(
+    make_text_generator, make_controlled_chunk_data
+):
+    """With 50% overlap, consecutive chunks should share half their content."""
+    s1 = "one"
+    s2 = "two"
+    s3 = "three"
+    s4 = "four"
+    text = "dummy"
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, chunk_size_per_sentence=10)
+    chunker = TextChunkerWithOverlap(
+        document,
+        get_text,
+        max_chunk_size=20,
+        chunk_overlap_ratio=0.5,
+        get_chunk_data=get_chunk_data,
+    )
+    chunks = [chunk async for chunk in chunker.read()]
+
+    assert len(chunks) == 3, "Should produce exactly 3 chunks (s1+s2, s2+s3, s3+s4)"
+    assert [c.chunk_index for c in chunks] == [0, 1, 2], "Chunk indices should be [0, 1, 2]"
+    assert "one" in chunks[0].text and "two" in chunks[0].text, "Chunk 0 should contain s1 and s2"
+    assert "two" in chunks[1].text and "three" in chunks[1].text, (
+        "Chunk 1 should contain s2 (overlap) and s3"
+    )
+    assert "three" in chunks[2].text and "four" in chunks[2].text, (
+        "Chunk 2 should contain s3 (overlap) and s4"
+    )
+
+
+@pytest.mark.asyncio
+async def test_zero_overlap_produces_no_duplicate_content(
+    make_text_generator, make_controlled_chunk_data
+):
+    """With 0% overlap, no content should appear in multiple chunks."""
+    s1 = "one"
+    s2 = "two"
+    s3 = "three"
+    s4 = "four"
+    text = "dummy"
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, chunk_size_per_sentence=10)
+    chunker = TextChunkerWithOverlap(
+        document,
+        get_text,
+        max_chunk_size=20,
+        chunk_overlap_ratio=0.0,
+        get_chunk_data=get_chunk_data,
+    )
+    chunks = [chunk async for chunk in chunker.read()]
+
+    assert len(chunks) == 2, "Should produce exactly 2 chunks (s1+s2, s3+s4)"
+    assert "one" in chunks[0].text and "two" in chunks[0].text, (
+        "First chunk should contain s1 and s2"
+    )
+    assert "three" in chunks[1].text and "four" in chunks[1].text, (
+        "Second chunk should contain s3 and s4"
+    )
+    assert "two" not in chunks[1].text and "three" not in chunks[0].text, (
+        "No overlap: end of chunk 0 should not appear in chunk 1"
+    )
+
+
+@pytest.mark.asyncio
+async def test_small_overlap_ratio_creates_minimal_overlap(
+    make_text_generator, make_controlled_chunk_data
+):
+    """With 25% overlap ratio, chunks should have minimal overlap."""
+    s1 = "alpha"
+    s2 = "beta"
+    s3 = "gamma"
+    s4 = "delta"
+    s5 = "epsilon"
+    text = "dummy"
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, s5, chunk_size_per_sentence=10)
+    chunker = TextChunkerWithOverlap(
+        document,
+        get_text,
+        max_chunk_size=40,
+        chunk_overlap_ratio=0.25,
+        get_chunk_data=get_chunk_data,
+    )
+    chunks = [chunk async for chunk in chunker.read()]
+
+    assert len(chunks) == 2, "Should produce exactly 2 chunks"
+    assert [c.chunk_index for c in chunks] == [0, 1], "Chunk indices should be [0, 1]"
+    assert all(token in chunks[0].text for token in [s1, s2, s3, s4]), (
+        "Chunk 0 should contain s1 through s4"
+    )
+    assert s4 in chunks[1].text and s5 in chunks[1].text, (
+        "Chunk 1 should contain overlap s4 and new content s5"
+    )
+
+
+@pytest.mark.asyncio
+async def test_high_overlap_ratio_creates_significant_overlap(
+    make_text_generator, make_controlled_chunk_data
+):
+    """With 75% overlap ratio, consecutive chunks should share most content."""
+    s1 = "red"
+    s2 = "blue"
+    s3 = "green"
+    s4 = "yellow"
+    s5 = "purple"
+    text = "dummy"
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, s5, chunk_size_per_sentence=5)
+    chunker = TextChunkerWithOverlap(
+        document,
+        get_text,
+        max_chunk_size=20,
+        chunk_overlap_ratio=0.75,
+        get_chunk_data=get_chunk_data,
+    )
+    chunks = [chunk async for chunk in chunker.read()]
+
+    assert len(chunks) == 2, "Should produce exactly 2 chunks with 75% overlap"
+    assert [c.chunk_index for c in chunks] == [0, 1], "Chunk indices should be [0, 1]"
+    assert all(token in chunks[0].text for token in [s1, s2, s3, s4]), (
+        "Chunk 0 should contain s1, s2, s3, s4"
+    )
+    assert all(token in chunks[1].text for token in [s2, s3, s4, s5]), (
+        "Chunk 1 should contain s2, s3, s4 (overlap) and s5"
+    )
+
+
+@pytest.mark.asyncio
+async def test_single_chunk_no_dangling_overlap(make_text_generator, make_controlled_chunk_data):
+    """Text that fits in one chunk should produce exactly one chunk, no overlap artifact."""
+    s1 = "alpha"
+    s2 = "beta"
+    text = "dummy"
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+    get_text = make_text_generator(text)
+    get_chunk_data = make_controlled_chunk_data(s1, s2, chunk_size_per_sentence=10)
+    chunker = TextChunkerWithOverlap(
+        document,
+        get_text,
+        max_chunk_size=20,
+        chunk_overlap_ratio=0.5,
+        get_chunk_data=get_chunk_data,
+    )
+    chunks = [chunk async for chunk in chunker.read()]
+
+    assert len(chunks) == 1, (
+        "Should produce exactly 1 chunk when content fits within max_chunk_size"
+    )
+    assert chunks[0].chunk_index == 0, "Single chunk should have index 0"
+    assert "alpha" in chunks[0].text and "beta" in chunks[0].text, (
+        "Single chunk should contain all content"
+    )
+
+
+@pytest.mark.asyncio
+async def test_paragraph_chunking_with_overlap(make_text_generator):
+    """Test that chunk_by_paragraph integration produces 25% overlap between chunks."""
+
+    def mock_get_embedding_engine():
+        class MockEngine:
+            tokenizer = None
+
+        return MockEngine()
+
+    chunk_by_sentence_module = sys.modules.get("cognee.tasks.chunks.chunk_by_sentence")
+
+    max_chunk_size = 20
+    overlap_ratio = 0.25  # 5 token overlap
+    paragraph_max_size = int(0.5 * overlap_ratio * max_chunk_size)  # = 2
+
+    text = (
+        "A0 A1. A2 A3. A4 A5. A6 A7. A8 A9. "  # 10 tokens (0-9)
+        "B0 B1. B2 B3. B4 B5. B6 B7. B8 B9. "  # 10 tokens (10-19)
+        "C0 C1. C2 C3. C4 C5. C6 C7. C8 C9. "  # 10 tokens (20-29)
+        "D0 D1. D2 D3. D4 D5. D6 D7. D8 D9. "  # 10 tokens (30-39)
+        "E0 E1. E2 E3. E4 E5. E6 E7. E8 E9."  # 10 tokens (40-49)
+    )
+
+    document = Document(
+        id=uuid4(),
+        name="test_document",
+        raw_data_location="/test/path",
+        external_metadata=None,
+        mime_type="text/plain",
+    )
+
+    get_text = make_text_generator(text)
+
+    def get_chunk_data(text_input):
+        return chunk_by_paragraph(
+            text_input, max_chunk_size=paragraph_max_size, batch_paragraphs=True
+        )
+
+    with patch.object(
+        chunk_by_sentence_module, "get_embedding_engine", side_effect=mock_get_embedding_engine
+    ):
+        chunker = TextChunkerWithOverlap(
+            document,
+            get_text,
+            max_chunk_size=max_chunk_size,
+            chunk_overlap_ratio=overlap_ratio,
+            get_chunk_data=get_chunk_data,
+        )
+        chunks = [chunk async for chunk in chunker.read()]
+
+    assert len(chunks) == 3, f"Should produce exactly 3 chunks, got {len(chunks)}"
+
+    assert chunks[0].chunk_index == 0, "First chunk should have index 0"
+    assert chunks[1].chunk_index == 1, "Second chunk should have index 1"
+    assert chunks[2].chunk_index == 2, "Third chunk should have index 2"
+
+    assert "A0" in chunks[0].text, "Chunk 0 should start with A0"
+    assert "A9" in chunks[0].text, "Chunk 0 should contain A9"
+    assert "B0" in chunks[0].text, "Chunk 0 should contain B0"
+    assert "B9" in chunks[0].text, "Chunk 0 should contain up to B9 (20 tokens)"
+
+    assert "B" in chunks[1].text, "Chunk 1 should have overlap from B section"
+    assert "C" in chunks[1].text, "Chunk 1 should contain C section"
+    assert "D" in chunks[1].text, "Chunk 1 should contain D section"
+
+    assert "D" in chunks[2].text, "Chunk 2 should have overlap from D section"
+    assert "E0" in chunks[2].text, "Chunk 2 should contain E0"
+    assert "E9" in chunks[2].text, "Chunk 2 should end with E9"
+
+    chunk_0_end_words = chunks[0].text.split()[-4:]
+    chunk_1_words = chunks[1].text.split()
+    overlap_0_1 = any(word in chunk_1_words for word in chunk_0_end_words)
+    assert overlap_0_1, (
+        f"No overlap detected between chunks 0 and 1. "
+        f"Chunk 0 ends with: {chunk_0_end_words}, "
+        f"Chunk 1 starts with: {chunk_1_words[:6]}"
+    )
+
+    chunk_1_end_words = chunks[1].text.split()[-4:]
+    chunk_2_words = chunks[2].text.split()
+    overlap_1_2 = any(word in chunk_2_words for word in chunk_1_end_words)
+    assert overlap_1_2, (
+        f"No overlap detected between chunks 1 and 2. "
+        f"Chunk 1 ends with: {chunk_1_end_words}, "
+        f"Chunk 2 starts with: {chunk_2_words[:6]}"
+    )