From 6636fe8afdb39b39f1981fe7f1c65c4e087203a1 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 25 Sep 2025 18:03:17 +0200
Subject: [PATCH 01/61] refactor: Add maximum document batch size for document
 processing

---
 .../modules/pipelines/operations/run_tasks.py | 41 +++++++++++--------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/cognee/modules/pipelines/operations/run_tasks.py b/cognee/modules/pipelines/operations/run_tasks.py
index 62d4972ad..4a86a5807 100644
--- a/cognee/modules/pipelines/operations/run_tasks.py
+++ b/cognee/modules/pipelines/operations/run_tasks.py
@@ -37,6 +37,8 @@ from ..tasks.task import Task
 
 
 logger = get_logger("run_tasks(tasks: [Task], data)")
+# TODO: See if this parameter should be configurable as input for run_tasks itself
+DOCUMENT_BATCH_SIZE = 10
 
 
 def override_run_tasks(new_gen):
@@ -266,24 +268,29 @@ async def run_tasks(
         if incremental_loading:
             data = await resolve_data_directories(data)
 
-        # Create async tasks per data item that will run the pipeline for the data item
-        data_item_tasks = [
-            asyncio.create_task(
-                _run_tasks_data_item(
-                    data_item,
-                    dataset,
-                    tasks,
-                    pipeline_name,
-                    pipeline_id,
-                    pipeline_run_id,
-                    context,
-                    user,
-                    incremental_loading,
+        # Create and gather batches of async tasks of data items that will run the pipeline for the data item
+        results = []
+        for start in range(0, len(data), DOCUMENT_BATCH_SIZE):
+            document_batch = data[start : start + DOCUMENT_BATCH_SIZE]
+
+            data_item_tasks = [
+                asyncio.create_task(
+                    _run_tasks_data_item(
+                        data_item,
+                        dataset,
+                        tasks,
+                        pipeline_name,
+                        pipeline_id,
+                        pipeline_run_id,
+                        context,
+                        user,
+                        incremental_loading,
+                    )
                 )
-            )
-            for data_item in data
-        ]
-        results = await asyncio.gather(*data_item_tasks)
+                for data_item in document_batch
+            ]
+
+            results.extend(await asyncio.gather(*data_item_tasks))
 
         # Remove skipped data items from results
         results = [result for result in results if result]

From 9206d8536b89d4292c0286a35985665ce6f133d1 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 6 Oct 2025 17:45:22 +0200
Subject: [PATCH 02/61] initial changes, still need to work on this. commit so
 I can checkout to diff branch

---
 .github/workflows/examples_tests.yml | 58 ++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml
index 4eb9e184f..406420351 100644
--- a/.github/workflows/examples_tests.yml
+++ b/.github/workflows/examples_tests.yml
@@ -85,8 +85,8 @@ jobs:
         run: uv run python ./cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
 
 
-  test-dynamic-steps-metrics:
-    name: Run Dynamic Steps Example
+  test-multiple-examples:
+    name: Run Multiple Example Scripts
     runs-on: ubuntu-22.04
     steps:
       - name: Check out repository
@@ -97,7 +97,7 @@ jobs:
         with:
           python-version: '3.11.x'
 
-      - name: Run Dynamic Steps Tests
+      - name: Run Dynamic Steps Example
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           LLM_MODEL: ${{ secrets.LLM_MODEL }}
@@ -110,6 +110,58 @@ jobs:
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
         run: uv run python ./examples/python/dynamic_steps_example.py
 
+      - name: Run Temporal Example
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./examples/python/temporal_example.py
+
+      - name: Run Ontology Demo Example
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./examples/python/ontology_demo_example.py
+
+      - name: Run Temporal Example
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./examples/python/temporal_example.py
+
+      - name: Run Agentic Reasoning Example
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./examples/python/agentic_reasoning_procurement_example.py
+
   test-memify:
     name: Run Memify Example
     runs-on: ubuntu-22.04

From 2932a627bbc674d0a4929b7be82b0e150ec8df50 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 9 Oct 2025 09:45:26 +0200
Subject: [PATCH 03/61] test: Potential fix for soft deletion test

---
 .github/workflows/test_different_operating_systems.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml
index 6eb5744f3..e784c9ca3 100644
--- a/.github/workflows/test_different_operating_systems.yml
+++ b/.github/workflows/test_different_operating_systems.yml
@@ -193,6 +193,13 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
+      - name: Path setup
+        if: ${{ matrix.os }} == 'windows-latest'
+        shell: bash
+        run: |
+          PATH=$(printf '%s\n' "$PATH" | grep -vi '/git/usr/bin' | paste -sd: -)
+          export PATH
+
       - name: Run Soft Deletion Tests
         env:
           ENV: 'dev'

From d1d8e334716d81fe6e8b1f1b185743197f27d79e Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 9 Oct 2025 10:48:21 +0200
Subject: [PATCH 04/61] test: Fix windows tests. First try of potential fixes.

---
 cognee/tests/test_library.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py
index c5e6cc64b..fe1a0bdfa 100755
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@@ -82,6 +82,11 @@ async def main():
     data_root_directory = get_storage_config()["data_root_directory"]
     assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
+    from cognee.infrastructure.databases.relational import get_relational_engine
+
+    get_relational_engine().get_session().close()
+    await get_relational_engine().engine.dispose()
+
     # Assert relational, vector and graph databases have been cleaned properly
     await cognee.prune.prune_system(metadata=True)
 
@@ -89,7 +94,7 @@ async def main():
     collection_names = await connection.table_names()
     assert len(collection_names) == 0, "LanceDB vector database is not empty"
 
-    from cognee.infrastructure.databases.relational import get_relational_engine
+
 
     db_path = get_relational_engine().db_path
     dir_path = os.path.dirname(db_path)

From ee96d8f940f7248dbc9a6b6cecbccd4c6f7fc24b Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 9 Oct 2025 11:00:54 +0200
Subject: [PATCH 05/61] chore: fix formatting

---
 cognee/tests/test_library.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py
index fe1a0bdfa..2933c77ba 100755
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@@ -94,8 +94,6 @@ async def main():
     collection_names = await connection.table_names()
     assert len(collection_names) == 0, "LanceDB vector database is not empty"
 
-
-
     db_path = get_relational_engine().db_path
     dir_path = os.path.dirname(db_path)
     file_path = os.path.basename(db_path)

From a44ab88519a784ef22ca9d133200124373db8dc7 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 9 Oct 2025 17:31:27 +0200
Subject: [PATCH 06/61] test: try calling gc to fix windows issue

---
 .../databases/relational/sqlalchemy/SqlAlchemyAdapter.py   | 4 +++-
 cognee/tests/test_library.py                               | 7 ++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
index 88d2abc7e..4908295ca 100644
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@@ -507,7 +507,9 @@ class SQLAlchemyAdapter:
             if self.engine.dialect.name == "sqlite":
                 await self.engine.dispose(close=True)
                 # Wait for the database connections to close and release the file (Windows)
-                await asyncio.sleep(2)
+                import gc
+                gc.collect()
+                # await asyncio.sleep(2)
                 db_directory = path.dirname(self.db_path)
                 file_name = path.basename(self.db_path)
                 file_storage = get_file_storage(db_directory)
diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py
index 2933c77ba..c5e6cc64b 100755
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@@ -82,11 +82,6 @@ async def main():
     data_root_directory = get_storage_config()["data_root_directory"]
     assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
-    from cognee.infrastructure.databases.relational import get_relational_engine
-
-    get_relational_engine().get_session().close()
-    await get_relational_engine().engine.dispose()
-
     # Assert relational, vector and graph databases have been cleaned properly
     await cognee.prune.prune_system(metadata=True)
 
@@ -94,6 +89,8 @@ async def main():
     collection_names = await connection.table_names()
     assert len(collection_names) == 0, "LanceDB vector database is not empty"
 
+    from cognee.infrastructure.databases.relational import get_relational_engine
+
     db_path = get_relational_engine().db_path
     dir_path = os.path.dirname(db_path)
     file_path = os.path.basename(db_path)

From a7a2631d53ba274511072e312745ecdabf9f8765 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 9 Oct 2025 17:50:20 +0200
Subject: [PATCH 07/61] chore: format

---
 .../databases/relational/sqlalchemy/SqlAlchemyAdapter.py         | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
index 4908295ca..36ba90db5 100644
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@@ -508,6 +508,7 @@ class SQLAlchemyAdapter:
                 await self.engine.dispose(close=True)
                 # Wait for the database connections to close and release the file (Windows)
                 import gc
+
                 gc.collect()
                 # await asyncio.sleep(2)
                 db_directory = path.dirname(self.db_path)

From abfcbc69d61ec8a71ed83a8dd32894f5e99d8248 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 10 Oct 2025 15:36:36 +0200
Subject: [PATCH 08/61] refactor: Have embedding calls run in async gather

---
 cognee/api/v1/cognify/cognify.py          |  6 ++---
 cognee/tasks/storage/index_data_points.py | 33 +++++++++++++----------
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 1292d243a..6a9f68443 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -269,13 +269,13 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": 30},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": 30},
         ),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": 100}),
     ]
 
     return default_tasks
diff --git a/cognee/tasks/storage/index_data_points.py b/cognee/tasks/storage/index_data_points.py
index 362412657..ebc4640d6 100644
--- a/cognee/tasks/storage/index_data_points.py
+++ b/cognee/tasks/storage/index_data_points.py
@@ -1,6 +1,6 @@
-from cognee.shared.logging_utils import get_logger
+import asyncio
 
-from cognee.infrastructure.databases.exceptions import EmbeddingException
+from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.engine import DataPoint
 
@@ -33,18 +33,23 @@ async def index_data_points(data_points: list[DataPoint]):
             indexed_data_point.metadata["index_fields"] = [field_name]
             index_points[index_name].append(indexed_data_point)
 
-    for index_name_and_field, indexable_points in index_points.items():
-        first_occurence = index_name_and_field.index("_")
-        index_name = index_name_and_field[:first_occurence]
-        field_name = index_name_and_field[first_occurence + 1 :]
-        try:
-            # In case the amount of indexable points is too large we need to send them in batches
-            batch_size = vector_engine.embedding_engine.get_batch_size()
-            for i in range(0, len(indexable_points), batch_size):
-                batch = indexable_points[i : i + batch_size]
-                await vector_engine.index_data_points(index_name, field_name, batch)
-        except EmbeddingException as e:
-            logger.warning(f"Failed to index data points for {index_name}.{field_name}: {e}")
+    tasks: list[asyncio.Task] = []
+    batch_size = vector_engine.embedding_engine.get_batch_size()
+
+    for index_name_and_field, points in index_points.items():
+        first = index_name_and_field.index("_")
+        index_name = index_name_and_field[:first]
+        field_name = index_name_and_field[first + 1 :]
+
+        # Split in the usual “range step batch_size” manner
+        for i in range(0, len(points), batch_size):
+            batch = points[i : i + batch_size]
+            tasks.append(
+                asyncio.create_task(vector_engine.index_data_points(index_name, field_name, batch))
+            )
+
+    # Fire them all and wait until every task is done.
+    await asyncio.gather(*tasks)
 
     return data_points
 

From 757d745b5d262975c05f5fe3bb3f410f5c3d72b7 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 10 Oct 2025 17:12:09 +0200
Subject: [PATCH 09/61] refactor: Optimize cognification speed

---
 cognee/api/v1/cognify/cognify.py                  |  4 ++--
 .../databases/vector/embeddings/config.py         |  4 ++--
 cognee/tasks/storage/index_graph_edges.py         | 15 +++++++++++----
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 6a9f68443..30afb269a 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -269,11 +269,11 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": 30},
+            task_config={"batch_size": 100},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": 30},
+            task_config={"batch_size": 100},
         ),
         Task(add_data_points, task_config={"batch_size": 100}),
     ]
diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py
index 24f724151..dcb55f4a4 100644
--- a/cognee/infrastructure/databases/vector/embeddings/config.py
+++ b/cognee/infrastructure/databases/vector/embeddings/config.py
@@ -26,9 +26,9 @@ class EmbeddingConfig(BaseSettings):
     def model_post_init(self, __context) -> None:
         # If embedding batch size is not defined use 2048 as default for OpenAI and 100 for all other embedding models
         if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
-            self.embedding_batch_size = 2048
+            self.embedding_batch_size = 30
         elif not self.embedding_batch_size:
-            self.embedding_batch_size = 100
+            self.embedding_batch_size = 10
 
     def to_dict(self) -> dict:
         """
diff --git a/cognee/tasks/storage/index_graph_edges.py b/cognee/tasks/storage/index_graph_edges.py
index b7bf7a2b9..4fa8cfc75 100644
--- a/cognee/tasks/storage/index_graph_edges.py
+++ b/cognee/tasks/storage/index_graph_edges.py
@@ -1,3 +1,5 @@
+import asyncio
+
 from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
 from cognee.shared.logging_utils import get_logger
 from collections import Counter
@@ -76,15 +78,20 @@ async def index_graph_edges(
             indexed_data_point.metadata["index_fields"] = [field_name]
             index_points[index_name].append(indexed_data_point)
 
+    # Get maximum batch size for embedding model
+    batch_size = vector_engine.embedding_engine.get_batch_size()
+    tasks: list[asyncio.Task] = []
+
     for index_name, indexable_points in index_points.items():
         index_name, field_name = index_name.split(".")
 
-        # Get maximum batch size for embedding model
-        batch_size = vector_engine.embedding_engine.get_batch_size()
-        # We save the data in batches of {batch_size} to not put a lot of pressure on the database
+        # Create embedding tasks to run in parallel later
         for start in range(0, len(indexable_points), batch_size):
             batch = indexable_points[start : start + batch_size]
 
-            await vector_engine.index_data_points(index_name, field_name, batch)
+            tasks.append(vector_engine.index_data_points(index_name, field_name, batch))
+
+    # Start all embedding tasks and wait for completion
+    await asyncio.gather(*tasks)
 
     return None

From 13d1133680a241a9423b57d760c2319c20b80670 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 10 Oct 2025 17:14:10 +0200
Subject: [PATCH 10/61] chore: Change comments

---
 cognee/tasks/storage/index_data_points.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cognee/tasks/storage/index_data_points.py b/cognee/tasks/storage/index_data_points.py
index ebc4640d6..902789c80 100644
--- a/cognee/tasks/storage/index_data_points.py
+++ b/cognee/tasks/storage/index_data_points.py
@@ -41,14 +41,14 @@ async def index_data_points(data_points: list[DataPoint]):
         index_name = index_name_and_field[:first]
         field_name = index_name_and_field[first + 1 :]
 
-        # Split in the usual “range step batch_size” manner
+        # Create embedding requests per batch to run in parallel later
         for i in range(0, len(points), batch_size):
             batch = points[i : i + batch_size]
             tasks.append(
                 asyncio.create_task(vector_engine.index_data_points(index_name, field_name, batch))
             )
 
-    # Fire them all and wait until every task is done.
+    # Run all embedding requests in parallel
     await asyncio.gather(*tasks)
 
     return data_points

From ecb285e36613a22d1ad7338b5aa13ade9ff21a9b Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 12 Oct 2025 13:46:12 +0200
Subject: [PATCH 11/61] added formatting

---
 cognee/modules/pipelines/operations/run_tasks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cognee/modules/pipelines/operations/run_tasks.py b/cognee/modules/pipelines/operations/run_tasks.py
index 2a5bf81a8..2e0055384 100644
--- a/cognee/modules/pipelines/operations/run_tasks.py
+++ b/cognee/modules/pipelines/operations/run_tasks.py
@@ -91,7 +91,6 @@ async def run_tasks(
         if incremental_loading:
             data = await resolve_data_directories(data)
 
-
         # Create and gather batches of async tasks of data items that will run the pipeline for the data item
         results = []
         for start in range(0, len(data), DOCUMENT_BATCH_SIZE):

From ef5965224ac49df72a21e56d9b0537df73ef6a37 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 13 Oct 2025 12:44:55 +0200
Subject: [PATCH 12/61] fix: Revert changes made to sql alchemy adapter for lib
 test

---
 .../databases/relational/sqlalchemy/SqlAlchemyAdapter.py     | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
index 36ba90db5..88d2abc7e 100644
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@@ -507,10 +507,7 @@ class SQLAlchemyAdapter:
             if self.engine.dialect.name == "sqlite":
                 await self.engine.dispose(close=True)
                 # Wait for the database connections to close and release the file (Windows)
-                import gc
-
-                gc.collect()
-                # await asyncio.sleep(2)
+                await asyncio.sleep(2)
                 db_directory = path.dirname(self.db_path)
                 file_name = path.basename(self.db_path)
                 file_storage = get_file_storage(db_directory)

From f81aeff0096664a7556d451056f813e4994ab2d2 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 13 Oct 2025 12:50:26 +0200
Subject: [PATCH 13/61] Revert "fix: Revert changes made to sql alchemy adapter
 for lib test"

This reverts commit ef5965224ac49df72a21e56d9b0537df73ef6a37.
---
 .../databases/relational/sqlalchemy/SqlAlchemyAdapter.py     | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
index 88d2abc7e..36ba90db5 100644
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@@ -507,7 +507,10 @@ class SQLAlchemyAdapter:
             if self.engine.dialect.name == "sqlite":
                 await self.engine.dispose(close=True)
                 # Wait for the database connections to close and release the file (Windows)
-                await asyncio.sleep(2)
+                import gc
+
+                gc.collect()
+                # await asyncio.sleep(2)
                 db_directory = path.dirname(self.db_path)
                 file_name = path.basename(self.db_path)
                 file_storage = get_file_storage(db_directory)

From 74ce78ddfe1542b470bade83c75828a70efdf4d4 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 13 Oct 2025 12:52:05 +0200
Subject: [PATCH 14/61] fix: Revert changes to sql alchemy for lib test

---
 .../databases/relational/sqlalchemy/SqlAlchemyAdapter.py     | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
index 93252ab6e..380ce9917 100644
--- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
+++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
@@ -507,10 +507,7 @@ class SQLAlchemyAdapter:
             if self.engine.dialect.name == "sqlite":
                 await self.engine.dispose(close=True)
                 # Wait for the database connections to close and release the file (Windows)
-                import gc
-
-                gc.collect()
-                # await asyncio.sleep(2)
+                await asyncio.sleep(2)
                 db_directory = path.dirname(self.db_path)
                 file_name = path.basename(self.db_path)
                 file_storage = get_file_storage(db_directory)

From 832243034f291d63c70eeeb43828ccdbc69d7bc0 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 13 Oct 2025 16:21:19 +0200
Subject: [PATCH 15/61] test: small change in soft delete test

---
 .github/workflows/test_different_operating_systems.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml
index e784c9ca3..00e387ac4 100644
--- a/.github/workflows/test_different_operating_systems.yml
+++ b/.github/workflows/test_different_operating_systems.yml
@@ -197,7 +197,7 @@ jobs:
         if: ${{ matrix.os }} == 'windows-latest'
         shell: bash
         run: |
-          PATH=$(printf '%s\n' "$PATH" | grep -vi '/git/usr/bin' | paste -sd: -)
+          PATH=$(printf '%s' "$PATH" | tr ':' $'\n' | grep -vi '/git/usr/bin' | paste -sd: -)
           export PATH
 
       - name: Run Soft Deletion Tests

From eb631a23ad6eeaba9c1111b598a6f4f955cd6c86 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Tue, 14 Oct 2025 13:57:41 +0200
Subject: [PATCH 16/61] refactor: set default numbers that are more reasonable

---
 cognee/api/v1/cognify/cognify.py                            | 6 +++---
 cognee/infrastructure/databases/vector/embeddings/config.py | 5 ++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 30afb269a..898c35518 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -269,13 +269,13 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": 100},
+            task_config={"batch_size": 20},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": 100},
+            task_config={"batch_size": 20},
         ),
-        Task(add_data_points, task_config={"batch_size": 100}),
+        Task(add_data_points, task_config={"batch_size": 20}),
     ]
 
     return default_tasks
diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py
index dcb55f4a4..314adbd99 100644
--- a/cognee/infrastructure/databases/vector/embeddings/config.py
+++ b/cognee/infrastructure/databases/vector/embeddings/config.py
@@ -24,11 +24,10 @@ class EmbeddingConfig(BaseSettings):
     model_config = SettingsConfigDict(env_file=".env", extra="allow")
 
     def model_post_init(self, __context) -> None:
-        # If embedding batch size is not defined use 2048 as default for OpenAI and 100 for all other embedding models
         if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
-            self.embedding_batch_size = 30
+            self.embedding_batch_size = 1024
         elif not self.embedding_batch_size:
-            self.embedding_batch_size = 10
+            self.embedding_batch_size = 100
 
     def to_dict(self) -> dict:
         """

From 84a23756f5c77ef3c7e0c78c4aff122416249341 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Tue, 14 Oct 2025 14:25:38 +0200
Subject: [PATCH 17/61] fix: Change chunk_size ot batch_size for temporal task

---
 cognee/api/v1/cognify/cognify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 898c35518..2c87dbc4b 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -311,7 +311,7 @@ async def get_temporal_tasks(
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
             chunker=chunker,
         ),
-        Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": 10}),
         Task(extract_knowledge_graph_from_events),
         Task(add_data_points, task_config={"batch_size": 10}),
     ]

From 98daadbb0461ae99935032bde96d8c056f874050 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Tue, 14 Oct 2025 20:29:55 +0200
Subject: [PATCH 18/61] refactor: Add tenacity retry mechanism

---
 .../embeddings/LiteLLMEmbeddingEngine.py      | 18 ++++++++++++++++--
 .../embeddings/OllamaEmbeddingEngine.py       | 19 ++++++++++++++++---
 poetry.lock                                   |  2 +-
 pyproject.toml                                |  3 ++-
 uv.lock                                       |  4 +++-
 5 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
index d68941d25..2a71d674d 100644
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@@ -1,8 +1,17 @@
 import asyncio
+import logging
+
 from cognee.shared.logging_utils import get_logger
 from typing import List, Optional
 import numpy as np
 import math
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
 import litellm
 import os
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
@@ -76,8 +85,13 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
             enable_mocking = str(enable_mocking).lower()
         self.mock = enable_mocking in ("true", "1", "yes")
 
-    @embedding_sleep_and_retry_async()
-    @embedding_rate_limit_async
+    @retry(
+        stop=stop_after_delay(180),
+        wait=wait_exponential_jitter(1, 180),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def embed_text(self, text: List[str]) -> List[List[float]]:
         """
         Embed a list of text strings into vector representations.
diff --git a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
index e79ba3f6a..b8ee9c7df 100644
--- a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
@@ -3,8 +3,16 @@ from cognee.shared.logging_utils import get_logger
 import aiohttp
 from typing import List, Optional
 import os
-
+import litellm
+import logging
 import aiohttp.http_exceptions
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
 
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 from cognee.infrastructure.llm.tokenizer.HuggingFace import (
@@ -69,7 +77,6 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
             enable_mocking = str(enable_mocking).lower()
         self.mock = enable_mocking in ("true", "1", "yes")
 
-    @embedding_rate_limit_async
     async def embed_text(self, text: List[str]) -> List[List[float]]:
         """
         Generate embedding vectors for a list of text prompts.
@@ -92,7 +99,13 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
         embeddings = await asyncio.gather(*[self._get_embedding(prompt) for prompt in text])
         return embeddings
 
-    @embedding_sleep_and_retry_async()
+    @retry(
+        stop=stop_after_delay(180),
+        wait=wait_exponential_jitter(1, 180),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def _get_embedding(self, prompt: str) -> List[float]:
         """
         Internal method to call the Ollama embeddings endpoint for a single prompt.
diff --git a/poetry.lock b/poetry.lock
index 551295733..ffc5ec575 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -12738,4 +12738,4 @@ posthog = ["posthog"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<=3.13"
-content-hash = "38353807b06e5c06caaa107979529937b978204f0f405c6b38cee283f4a49d3c"
+content-hash = "d8cd8a8db46416e0c844ff90df5bd64551ebf9a0c338fbb2023a61008ff5941d"
diff --git a/pyproject.toml b/pyproject.toml
index 3df57e1f5..7ac2915d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,8 @@ dependencies = [
     "networkx>=3.4.2,<4",
     "uvicorn>=0.34.0,<1.0.0",
     "gunicorn>=20.1.0,<24",
-    "websockets>=15.0.1,<16.0.0"
+    "websockets>=15.0.1,<16.0.0",
+    "tenacity>=9.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/uv.lock b/uv.lock
index 570da9289..5c06b96be 100644
--- a/uv.lock
+++ b/uv.lock
@@ -856,7 +856,7 @@ wheels = [
 
 [[package]]
 name = "cognee"
-version = "0.3.4"
+version = "0.3.5"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },
@@ -892,6 +892,7 @@ dependencies = [
     { name = "rdflib" },
     { name = "sqlalchemy" },
     { name = "structlog" },
+    { name = "tenacity" },
     { name = "tiktoken" },
     { name = "typing-extensions" },
     { name = "uvicorn" },
@@ -1086,6 +1087,7 @@ requires-dist = [
     { name = "sentry-sdk", extras = ["fastapi"], marker = "extra == 'monitoring'", specifier = ">=2.9.0,<3" },
     { name = "sqlalchemy", specifier = ">=2.0.39,<3.0.0" },
     { name = "structlog", specifier = ">=25.2.0,<26" },
+    { name = "tenacity", specifier = ">=9.0.0" },
     { name = "tiktoken", specifier = ">=0.8.0,<1.0.0" },
     { name = "transformers", marker = "extra == 'codegraph'", specifier = ">=4.46.3,<5" },
     { name = "transformers", marker = "extra == 'huggingface'", specifier = ">=4.46.3,<5" },

From 1b28f137431d30c940568406fab1678db9276c28 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 13:32:17 +0200
Subject: [PATCH 19/61] refactor: Optimize Cognee speed

---
 cognee/api/v1/cognify/cognify.py              |  6 +++---
 .../embeddings/FastembedEmbeddingEngine.py    | 20 +++++++++++++++++--
 .../embeddings/LiteLLMEmbeddingEngine.py      | 11 ++--------
 .../embeddings/OllamaEmbeddingEngine.py       |  4 ++--
 .../databases/vector/embeddings/config.py     |  4 ++--
 5 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 9215c9369..3032bd4e8 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -269,13 +269,13 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": 20},
+            task_config={"batch_size": 100},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": 20},
+            task_config={"batch_size": 100},
         ),
-        Task(add_data_points, task_config={"batch_size": 20}),
+        Task(add_data_points, task_config={"batch_size": 100}),
     ]
 
     return default_tasks
diff --git a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
index e34ab5d9d..c2acd516e 100644
--- a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
@@ -1,8 +1,17 @@
-from cognee.shared.logging_utils import get_logger
+import os
+import logging
 from typing import List, Optional
 from fastembed import TextEmbedding
 import litellm
-import os
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+
+from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 from cognee.infrastructure.databases.exceptions import EmbeddingException
 from cognee.infrastructure.llm.tokenizer.TikToken import (
@@ -57,6 +66,13 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
             enable_mocking = str(enable_mocking).lower()
         self.mock = enable_mocking in ("true", "1", "yes")
 
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def embed_text(self, text: List[str]) -> List[List[float]]:
         """
         Embed the given text into numerical vectors.
diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
index 302950f66..03ce86bee 100644
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@@ -16,9 +16,6 @@ import litellm
 import os
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 from cognee.infrastructure.databases.exceptions import EmbeddingException
-from cognee.infrastructure.llm.tokenizer.Gemini import (
-    GeminiTokenizer,
-)
 from cognee.infrastructure.llm.tokenizer.HuggingFace import (
     HuggingFaceTokenizer,
 )
@@ -28,10 +25,6 @@ from cognee.infrastructure.llm.tokenizer.Mistral import (
 from cognee.infrastructure.llm.tokenizer.TikToken import (
     TikTokenTokenizer,
 )
-from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter import (
-    embedding_rate_limit_async,
-    embedding_sleep_and_retry_async,
-)
 
 litellm.set_verbose = False
 logger = get_logger("LiteLLMEmbeddingEngine")
@@ -86,8 +79,8 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
         self.mock = enable_mocking in ("true", "1", "yes")
 
     @retry(
-        stop=stop_after_delay(180),
-        wait=wait_exponential_jitter(1, 180),
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
diff --git a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
index b8ee9c7df..2882b679a 100644
--- a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
@@ -100,8 +100,8 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
         return embeddings
 
     @retry(
-        stop=stop_after_delay(180),
-        wait=wait_exponential_jitter(1, 180),
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py
index 314adbd99..56cd79678 100644
--- a/cognee/infrastructure/databases/vector/embeddings/config.py
+++ b/cognee/infrastructure/databases/vector/embeddings/config.py
@@ -25,9 +25,9 @@ class EmbeddingConfig(BaseSettings):
 
     def model_post_init(self, __context) -> None:
         if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
-            self.embedding_batch_size = 1024
+            self.embedding_batch_size = 36
         elif not self.embedding_batch_size:
-            self.embedding_batch_size = 100
+            self.embedding_batch_size = 36
 
     def to_dict(self) -> dict:
         """

From fc4440da8c7b7cdfd4087f34c40ac90cc86bb839 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 14:43:21 +0200
Subject: [PATCH 20/61] refactor: update env template

---
 .env.template                                          |  5 ++---
 .../loaders/external/advanced_pdf_loader.py            | 10 ++--------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/.env.template b/.env.template
index 7fd3ba9e8..3137636d3 100644
--- a/.env.template
+++ b/.env.template
@@ -28,11 +28,10 @@ EMBEDDING_ENDPOINT=""
 EMBEDDING_API_VERSION=""
 EMBEDDING_DIMENSIONS=3072
 EMBEDDING_MAX_TOKENS=8191
+EMBEDDING_BATCH_SIZE=36
 # If embedding key is not provided same key set for LLM_API_KEY will be used
 #EMBEDDING_API_KEY="your_api_key"
-# Note: OpenAI support up to 2048 elements and Gemini supports a maximum of 100 elements in an embedding batch,
-#       Cognee sets the optimal batch size for OpenAI and Gemini, but a custom size can be defined if necessary for other models
-#EMBEDDING_BATCH_SIZE=2048
+
 
 # If using BAML structured output these env variables will be used
 BAML_LLM_PROVIDER=openai
diff --git a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
index 7bab8cac6..6d1412b77 100644
--- a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
+++ b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
@@ -14,14 +14,6 @@ from cognee.infrastructure.loaders.external.pypdf_loader import PyPdfLoader
 
 logger = get_logger(__name__)
 
-try:
-    from unstructured.partition.pdf import partition_pdf
-except ImportError as e:
-    logger.info(
-        "unstructured[pdf] not installed, can't use AdvancedPdfLoader, will use PyPdfLoader instead."
-    )
-    raise ImportError from e
-
 
 @dataclass
 class _PageBuffer:
@@ -88,6 +80,8 @@ class AdvancedPdfLoader(LoaderInterface):
                 **kwargs,
             }
             # Use partition to extract elements
+            from unstructured.partition.pdf import partition_pdf
+
             elements = partition_pdf(**partition_kwargs)
 
             # Process elements into text content

From 8692cd13381204a98100fb296bf971ad14ea7a32 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 16:03:17 +0100
Subject: [PATCH 21/61] feat: add count_nodes and count_edges methods to
 GraphDBInterface

---
 .../infrastructure/databases/graph/graph_db_interface.py  | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py
index 65afdf275..abfdff784 100644
--- a/cognee/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognee/infrastructure/databases/graph/graph_db_interface.py
@@ -159,6 +159,14 @@ class GraphDBInterface(ABC):
     - get_connections
     """
 
+    @abstractmethod
+    async def count_nodes(self) -> int:
+        raise NotImplementedError
+
+    @abstractmethod
+    async def count_edges(self) -> int:
+        raise NotImplementedError
+
     @abstractmethod
     async def query(self, query: str, params: dict) -> List[Any]:
         """

From 5663c3fe3ab80f0eee7adb3576af4b579a1d8306 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 17:38:18 +0200
Subject: [PATCH 22/61] refactor: add batch size param to temporal graphs

---
 cognee/api/v1/cognify/cognify.py | 34 ++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 3032bd4e8..d29d8c939 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -44,6 +44,7 @@ async def cognify(
     graph_model: BaseModel = KnowledgeGraph,
     chunker=TextChunker,
     chunk_size: int = None,
+    batch_size: int = None,
     config: Config = None,
     vector_db_config: dict = None,
     graph_db_config: dict = None,
@@ -105,6 +106,7 @@ async def cognify(
                    Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                    Default limits: ~512-8192 tokens depending on models.
                    Smaller chunks = more granular but potentially fragmented knowledge.
+        batch_size: Number of chunks to be processed in a single batch in Cognify tasks.
         vector_db_config: Custom vector database configuration for embeddings storage.
         graph_db_config: Custom graph database configuration for relationship storage.
         run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -209,10 +211,18 @@ async def cognify(
             }
 
     if temporal_cognify:
-        tasks = await get_temporal_tasks(user, chunker, chunk_size)
+        tasks = await get_temporal_tasks(
+            user=user, chunker=chunker, chunk_size=chunk_size, batch_size=batch_size
+        )
     else:
         tasks = await get_default_tasks(
-            user, graph_model, chunker, chunk_size, config, custom_prompt
+            user=user,
+            graph_model=graph_model,
+            chunker=chunker,
+            chunk_size=chunk_size,
+            config=config,
+            custom_prompt=custom_prompt,
+            batch_size=batch_size,
         )
 
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -238,6 +248,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunk_size: int = None,
     config: Config = None,
     custom_prompt: Optional[str] = None,
+    batch_size: int = 100,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -256,6 +267,9 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                 "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
             }
 
+    if batch_size is None:
+        batch_size = 100
+
     default_tasks = [
         Task(classify_documents),
         Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -269,20 +283,20 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": 100},
+            task_config={"batch_size": batch_size},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": 100},
+            task_config={"batch_size": batch_size},
         ),
-        Task(add_data_points, task_config={"batch_size": 100}),
+        Task(add_data_points, task_config={"batch_size": batch_size}),
     ]
 
     return default_tasks
 
 
 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None
+    user: User = None, chunker=TextChunker, chunk_size: int = None, batch_size: int = 10
 ) -> list[Task]:
     """
     Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -299,10 +313,14 @@ async def get_temporal_tasks(
         user (User, optional): The user requesting task execution, used for permission checks.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
+        batch_size (int, optional): Number of chunks to process in a single batch in Cognify
 
     Returns:
         list[Task]: A list of Task objects representing the temporal processing pipeline.
     """
+    if batch_size is None:
+        batch_size = 10
+
     temporal_tasks = [
         Task(classify_documents),
         Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -311,9 +329,9 @@ async def get_temporal_tasks(
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
             chunker=chunker,
         ),
-        Task(extract_events_and_timestamps, task_config={"batch_size": 10}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": batch_size}),
         Task(extract_knowledge_graph_from_events),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": batch_size}),
     ]
 
     return temporal_tasks

From f3ec1801025eb5cc1c2dc899a8aa3eca02ae4165 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 16:39:25 +0100
Subject: [PATCH 23/61] Implement count_edges and count_methods for Kuzu

---
 .../databases/graph/kuzu/adapter.py           | 16 ++++++++++
 cognee/tests/test_kuzu.py                     | 29 ++++++++++++++++---
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py
index 7b772097f..a31726c9a 100644
--- a/cognee/infrastructure/databases/graph/kuzu/adapter.py
+++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py
@@ -185,6 +185,22 @@ class KuzuAdapter(GraphDBInterface):
         except FileNotFoundError:
             logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
 
+    async def count_edges(self) -> int:
+        query = """
+        MATCH ()-[r]->()
+        RETURN COUNT(r);
+        """
+        query_result = await self.query(query)
+        return query_result[0][0]
+
+    async def count_nodes(self) -> int:
+        query = """
+        MATCH (n)
+        RETURN COUNT(n);
+        """
+        query_result = await self.query(query)
+        return query_result[0][0]
+
     async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
         """
         Execute a Kuzu query asynchronously with automatic reconnection.
diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py
index 8749e42d0..e39edd06a 100644
--- a/cognee/tests/test_kuzu.py
+++ b/cognee/tests/test_kuzu.py
@@ -47,10 +47,31 @@ async def main():
             pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
         )
 
+        from cognee.infrastructure.databases.graph import get_graph_engine
+
+        graph_engine = await get_graph_engine()
+
+        edges_count = await graph_engine.count_edges()
+        nodes_count = await graph_engine.count_nodes()
+
+        assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty"
+
         await cognee.add([explanation_file_path_quantum], dataset_name)
 
+        edges_count = await graph_engine.count_edges()
+        nodes_count = await graph_engine.count_nodes()
+
+        assert edges_count == 0 and nodes_count == 0, (
+            "Kuzu graph database should be empty before cognify"
+        )
+
         await cognee.cognify([dataset_name])
 
+        edges_count = await graph_engine.count_edges()
+        nodes_count = await graph_engine.count_nodes()
+
+        assert edges_count != 0 and nodes_count != 0, "Kuzu graph database should not be empty"
+
         from cognee.infrastructure.databases.vector import get_vector_engine
 
         vector_engine = get_vector_engine()
@@ -114,11 +135,11 @@ async def main():
         assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
         await cognee.prune.prune_system(metadata=True)
-        from cognee.infrastructure.databases.graph import get_graph_engine
 
-        graph_engine = await get_graph_engine()
-        nodes, edges = await graph_engine.get_graph_data()
-        assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
+        edges_count = await graph_engine.count_edges()
+        nodes_count = await graph_engine.count_nodes()
+
+        assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty"
 
     finally:
         # Ensure cleanup even if tests fail

From 9367fa5d03f42e3a1feb4d7d0de61cd1bb547fd0 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 16:39:48 +0100
Subject: [PATCH 24/61] Prior to search, check if knowledge graph is empty

---
 cognee/api/v1/search/search.py               | 12 +++++++++++-
 cognee/modules/data/exceptions/__init__.py   |  1 +
 cognee/modules/data/exceptions/exceptions.py | 10 ++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index 0a9e76e96..32035e612 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -1,13 +1,14 @@
 from uuid import UUID
 from typing import Union, Optional, List, Type
 
+from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.users.models import User
 from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.search.methods import search as search_function
 from cognee.modules.data.methods import get_authorized_existing_datasets
-from cognee.modules.data.exceptions import DatasetNotFoundError
+from cognee.modules.data.exceptions import DatasetNotFoundError, SearchOnEmptyGraphError
 
 
 async def search(
@@ -175,6 +176,15 @@ async def search(
         if not datasets:
             raise DatasetNotFoundError(message="No datasets found.")
 
+    graph_engine = await get_graph_engine()
+    edges_count = await graph_engine.count_edges()
+    nodes_count = await graph_engine.count_nodes()
+
+    if nodes_count == 0 or edges_count == 0:
+        raise SearchOnEmptyGraphError(
+            message="Knowledge graph is empty, please ensure data is added and cognified."
+        )
+
     filtered_search_results = await search_function(
         query_text=query_text,
         query_type=query_type,
diff --git a/cognee/modules/data/exceptions/__init__.py b/cognee/modules/data/exceptions/__init__.py
index 54af81070..ba943634d 100644
--- a/cognee/modules/data/exceptions/__init__.py
+++ b/cognee/modules/data/exceptions/__init__.py
@@ -9,4 +9,5 @@ from .exceptions import (
     UnauthorizedDataAccessError,
     DatasetNotFoundError,
     DatasetTypeError,
+    SearchOnEmptyGraphError,
 )
diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py
index ac3b68e64..c2921750a 100644
--- a/cognee/modules/data/exceptions/exceptions.py
+++ b/cognee/modules/data/exceptions/exceptions.py
@@ -35,6 +35,16 @@ class DatasetNotFoundError(CogneeValidationError):
         super().__init__(message, name, status_code)
 
 
+class SearchOnEmptyGraphError(CogneeValidationError):
+    def __init__(
+        self,
+        message: str = "Knowledge graph is empty, please ensure data is added and cognified.",
+        name: str = "SearchOnEmptyGraphError",
+        status_code=status.HTTP_400_BAD_REQUEST,
+    ):
+        super().__init__(message, name, status_code)
+
+
 class DatasetTypeError(CogneeValidationError):
     def __init__(
         self,

From ea4a93efb172a82754a342084aa95393a0f11759 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 16:57:53 +0100
Subject: [PATCH 25/61] Implement count_nodes and count_edges methods for Neo4j

---
 .../databases/graph/neo4j_driver/adapter.py   | 16 +++++++++++
 cognee/tests/test_neo4j.py                    | 27 +++++++++++++++----
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
index 520295ed2..a61ab6f0b 100644
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@@ -87,6 +87,22 @@ class Neo4jAdapter(GraphDBInterface):
         async with self.driver.session(database=self.graph_database_name) as session:
             yield session
 
+    async def count_edges(self) -> int:
+        query = """
+        MATCH ()-[r]->()
+        RETURN COUNT(r) as total_edges;
+        """
+        query_result = await self.query(query)
+        return query_result[0]["total_edges"]
+
+    async def count_nodes(self) -> int:
+        query = """
+        MATCH (n)
+        RETURN COUNT(n) as total_nodes;
+        """
+        query_result = await self.query(query)
+        return query_result[0]["total_nodes"]
+
     @deadlock_retry()
     async def query(
         self,
diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py
index c74b4ab65..11f6156bd 100644
--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@@ -35,6 +35,15 @@ async def main():
     explanation_file_path_nlp = os.path.join(
         pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
     )
+    from cognee.infrastructure.databases.graph import get_graph_engine
+
+    graph_engine = await get_graph_engine()
+
+    edges_count = await graph_engine.count_edges()
+    nodes_count = await graph_engine.count_nodes()
+
+    assert edges_count == 0 and nodes_count == 0, "Graph has to be empty"
+
     await cognee.add([explanation_file_path_nlp], dataset_name)
 
     explanation_file_path_quantum = os.path.join(
@@ -43,8 +52,18 @@ async def main():
 
     await cognee.add([explanation_file_path_quantum], dataset_name)
 
+    edges_count = await graph_engine.count_edges()
+    nodes_count = await graph_engine.count_nodes()
+
+    assert edges_count == 0 and nodes_count == 0, "Graph has to be empty before cognify"
+
     await cognee.cognify([dataset_name])
 
+    edges_count = await graph_engine.count_edges()
+    nodes_count = await graph_engine.count_nodes()
+
+    assert edges_count != 0 and nodes_count != 0, "Graph shouldn't be empty"
+
     from cognee.infrastructure.databases.vector import get_vector_engine
 
     vector_engine = get_vector_engine()
@@ -117,11 +136,9 @@ async def main():
     assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
     await cognee.prune.prune_system(metadata=True)
-    from cognee.infrastructure.databases.graph import get_graph_engine
-
-    graph_engine = await get_graph_engine()
-    nodes, edges = await graph_engine.get_graph_data()
-    assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty"
+    edges_count = await graph_engine.count_edges()
+    nodes_count = await graph_engine.count_nodes()
+    assert nodes_count == 0 and edges_count == 0, "Neo4j graph database is not empty"
 
 
 if __name__ == "__main__":

From 96496f38ed1e4ce2dd63190c9cbf6a16338fbeb0 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 18:08:18 +0200
Subject: [PATCH 26/61] refactor: Switch to using tenacity for rate limiting

---
 .../llm/anthropic/adapter.py                  | 28 ++++++----
 .../litellm_instructor/llm/gemini/adapter.py  | 22 ++++++--
 .../llm/generic_llm_api/adapter.py            | 22 ++++++--
 .../litellm_instructor/llm/mistral/adapter.py | 54 ++++++-------------
 .../litellm_instructor/llm/ollama/adapter.py  | 41 +++++++++++---
 .../litellm_instructor/llm/openai/adapter.py  | 53 +++++++++++++-----
 6 files changed, 142 insertions(+), 78 deletions(-)

diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
index 2d88a8271..bf19d6e86 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
@@ -1,19 +1,24 @@
+import logging
 from typing import Type
 from pydantic import BaseModel
+import litellm
 import instructor
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
 
-from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
-)
-
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.config import get_llm_config
 
+logger = get_logger()
+
 
 class AnthropicAdapter(LLMInterface):
     """
@@ -35,8 +40,13 @@ class AnthropicAdapter(LLMInterface):
         self.model = model
         self.max_completion_tokens = max_completion_tokens
 
-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def acreate_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
index 510d29ce8..1187e0cad 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@@ -12,11 +12,18 @@ from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
+import logging
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
 )
 
+logger = get_logger()
+
 
 class GeminiAdapter(LLMInterface):
     """
@@ -58,8 +65,13 @@ class GeminiAdapter(LLMInterface):
 
         self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
 
-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def acreate_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
index 917599d4d..8bbbaa2cc 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
@@ -12,11 +12,18 @@ from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
+import logging
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
 )
 
+logger = get_logger()
+
 
 class GenericAPIAdapter(LLMInterface):
     """
@@ -58,8 +65,13 @@ class GenericAPIAdapter(LLMInterface):
 
         self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
 
-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def acreate_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
index c4e51b70b..78a3cbff5 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
@@ -1,20 +1,23 @@
 import litellm
 import instructor
 from pydantic import BaseModel
-from typing import Type, Optional
-from litellm import acompletion, JSONSchemaValidationError
+from typing import Type
+from litellm import JSONSchemaValidationError
 
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.observability.get_observe import get_observe
-from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.config import get_llm_config
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
+
+import logging
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
 )
 
 logger = get_logger()
@@ -47,8 +50,13 @@ class MistralAdapter(LLMInterface):
             api_key=get_llm_config().llm_api_key,
         )
 
-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def acreate_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
@@ -99,31 +107,3 @@ class MistralAdapter(LLMInterface):
             logger.error(f"Schema validation failed: {str(e)}")
             logger.debug(f"Raw response: {e.raw_response}")
             raise ValueError(f"Response failed schema validation: {str(e)}")
-
-    def show_prompt(self, text_input: str, system_prompt: str) -> str:
-        """
-        Format and display the prompt for a user query.
-
-        Parameters:
-        -----------
-            - text_input (str): Input text from the user to be included in the prompt.
-            - system_prompt (str): The system prompt that will be shown alongside the user input.
-
-        Returns:
-        --------
-            - str: The formatted prompt string combining system prompt and user input.
-        """
-        if not text_input:
-            text_input = "No user input provided."
-        if not system_prompt:
-            raise MissingSystemPromptPathError()
-
-        system_prompt = LLMGateway.read_query_prompt(system_prompt)
-
-        formatted_prompt = (
-            f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
-            if system_prompt
-            else None
-        )
-
-        return formatted_prompt
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
index 314cb79d8..9c3d185aa 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
@@ -1,4 +1,6 @@
 import base64
+import litellm
+import logging
 import instructor
 from typing import Type
 from openai import OpenAI
@@ -7,11 +9,17 @@ from pydantic import BaseModel
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
-)
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+
+logger = get_logger()
 
 
 class OllamaAPIAdapter(LLMInterface):
@@ -47,8 +55,13 @@ class OllamaAPIAdapter(LLMInterface):
             OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
         )
 
-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def acreate_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
@@ -90,7 +103,13 @@ class OllamaAPIAdapter(LLMInterface):
 
         return response
 
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def create_transcript(self, input_file: str) -> str:
         """
         Generate an audio transcript from a user query.
@@ -123,7 +142,13 @@ class OllamaAPIAdapter(LLMInterface):
 
         return transcription.text
 
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def transcribe_image(self, input_file: str) -> str:
         """
         Transcribe content from an image using base64 encoding.
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
index 527f64d75..8877c2bdf 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@@ -7,6 +7,15 @@ from openai import ContentFilterFinishReasonError
 from litellm.exceptions import ContentPolicyViolationError
 from instructor.core import InstructorRetryException
 
+import logging
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
@@ -14,19 +23,13 @@ from cognee.infrastructure.llm.exceptions import (
     ContentPolicyFilterError,
 )
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    rate_limit_sync,
-    sleep_and_retry_async,
-    sleep_and_retry_sync,
-)
 from cognee.modules.observability.get_observe import get_observe
 from cognee.shared.logging_utils import get_logger
 
-observe = get_observe()
-
 logger = get_logger()
 
+observe = get_observe()
+
 
 class OpenAIAdapter(LLMInterface):
     """
@@ -97,8 +100,13 @@ class OpenAIAdapter(LLMInterface):
         self.fallback_endpoint = fallback_endpoint
 
     @observe(as_type="generation")
-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def acreate_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
@@ -186,8 +194,13 @@ class OpenAIAdapter(LLMInterface):
                     ) from error
 
     @observe
-    @sleep_and_retry_sync()
-    @rate_limit_sync
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     def create_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
@@ -231,7 +244,13 @@ class OpenAIAdapter(LLMInterface):
             max_retries=self.MAX_RETRIES,
         )
 
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def create_transcript(self, input):
         """
         Generate an audio transcript from a user query.
@@ -263,7 +282,13 @@ class OpenAIAdapter(LLMInterface):
 
         return transcription
 
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
     async def transcribe_image(self, input) -> BaseModel:
         """
         Generate a transcription of an image from a user query.

From dede5fa6fdc5c42e6ad36826c72f8c62d91eacae Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 17:09:13 +0100
Subject: [PATCH 27/61] add unit tests for empty graph check on search

---
 cognee/tests/unit/api/test_search.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 cognee/tests/unit/api/test_search.py

diff --git a/cognee/tests/unit/api/test_search.py b/cognee/tests/unit/api/test_search.py
new file mode 100644
index 000000000..aff9e5d38
--- /dev/null
+++ b/cognee/tests/unit/api/test_search.py
@@ -0,0 +1,23 @@
+import pytest
+import cognee
+from cognee.modules.data.exceptions import SearchOnEmptyGraphError
+
+
+@pytest.mark.asyncio
+async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await cognee.add("Sample input")
+    with pytest.raises(SearchOnEmptyGraphError):
+        await cognee.search("Sample query")
+
+
+async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await cognee.add("Sample input")
+    await cognee.cognify()
+    try:
+        await cognee.search("Sample query")
+    except SearchOnEmptyGraphError:
+        pytest.fail("Should not raise SearchOnEmptyGraphError when data was added and cognified")

From 9e38a30c4945e1d5f3596550bd32ab26463cca03 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 17:20:45 +0100
Subject: [PATCH 28/61] refactor: keep only count_nodes

---
 cognee/api/v1/search/search.py                     |  3 +--
 .../databases/graph/graph_db_interface.py          |  4 ----
 .../infrastructure/databases/graph/kuzu/adapter.py |  8 --------
 .../databases/graph/neo4j_driver/adapter.py        |  8 --------
 cognee/tests/test_kuzu.py                          | 14 ++++----------
 cognee/tests/test_neo4j.py                         | 13 ++++---------
 examples/python/dynamic_steps_example.py           |  2 +-
 7 files changed, 10 insertions(+), 42 deletions(-)

diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index 32035e612..880a57b99 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -177,10 +177,9 @@ async def search(
             raise DatasetNotFoundError(message="No datasets found.")
 
     graph_engine = await get_graph_engine()
-    edges_count = await graph_engine.count_edges()
     nodes_count = await graph_engine.count_nodes()
 
-    if nodes_count == 0 or edges_count == 0:
+    if nodes_count == 0:
         raise SearchOnEmptyGraphError(
             message="Knowledge graph is empty, please ensure data is added and cognified."
         )
diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py
index abfdff784..a4542cefe 100644
--- a/cognee/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognee/infrastructure/databases/graph/graph_db_interface.py
@@ -163,10 +163,6 @@ class GraphDBInterface(ABC):
     async def count_nodes(self) -> int:
         raise NotImplementedError
 
-    @abstractmethod
-    async def count_edges(self) -> int:
-        raise NotImplementedError
-
     @abstractmethod
     async def query(self, query: str, params: dict) -> List[Any]:
         """
diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py
index a31726c9a..04c163efa 100644
--- a/cognee/infrastructure/databases/graph/kuzu/adapter.py
+++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py
@@ -185,14 +185,6 @@ class KuzuAdapter(GraphDBInterface):
         except FileNotFoundError:
             logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
 
-    async def count_edges(self) -> int:
-        query = """
-        MATCH ()-[r]->()
-        RETURN COUNT(r);
-        """
-        query_result = await self.query(query)
-        return query_result[0][0]
-
     async def count_nodes(self) -> int:
         query = """
         MATCH (n)
diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
index a61ab6f0b..ac19069f4 100644
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@@ -87,14 +87,6 @@ class Neo4jAdapter(GraphDBInterface):
         async with self.driver.session(database=self.graph_database_name) as session:
             yield session
 
-    async def count_edges(self) -> int:
-        query = """
-        MATCH ()-[r]->()
-        RETURN COUNT(r) as total_edges;
-        """
-        query_result = await self.query(query)
-        return query_result[0]["total_edges"]
-
     async def count_nodes(self) -> int:
         query = """
         MATCH (n)
diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py
index e39edd06a..c07a51104 100644
--- a/cognee/tests/test_kuzu.py
+++ b/cognee/tests/test_kuzu.py
@@ -51,26 +51,21 @@ async def main():
 
         graph_engine = await get_graph_engine()
 
-        edges_count = await graph_engine.count_edges()
         nodes_count = await graph_engine.count_nodes()
 
-        assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty"
+        assert nodes_count == 0, "Kuzu graph database is not empty"
 
         await cognee.add([explanation_file_path_quantum], dataset_name)
 
-        edges_count = await graph_engine.count_edges()
         nodes_count = await graph_engine.count_nodes()
 
-        assert edges_count == 0 and nodes_count == 0, (
-            "Kuzu graph database should be empty before cognify"
-        )
+        assert nodes_count == 0, "Kuzu graph database should be empty before cognify"
 
         await cognee.cognify([dataset_name])
 
-        edges_count = await graph_engine.count_edges()
         nodes_count = await graph_engine.count_nodes()
 
-        assert edges_count != 0 and nodes_count != 0, "Kuzu graph database should not be empty"
+        assert nodes_count != 0, "Kuzu graph database should not be empty"
 
         from cognee.infrastructure.databases.vector import get_vector_engine
 
@@ -136,10 +131,9 @@ async def main():
 
         await cognee.prune.prune_system(metadata=True)
 
-        edges_count = await graph_engine.count_edges()
         nodes_count = await graph_engine.count_nodes()
 
-        assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty"
+        assert nodes_count == 0, "Kuzu graph database is not empty"
 
     finally:
         # Ensure cleanup even if tests fail
diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py
index 11f6156bd..6f1fcf975 100644
--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@@ -39,10 +39,9 @@ async def main():
 
     graph_engine = await get_graph_engine()
 
-    edges_count = await graph_engine.count_edges()
     nodes_count = await graph_engine.count_nodes()
 
-    assert edges_count == 0 and nodes_count == 0, "Graph has to be empty"
+    assert nodes_count == 0, "Graph has to be empty"
 
     await cognee.add([explanation_file_path_nlp], dataset_name)
 
@@ -51,18 +50,15 @@ async def main():
     )
 
     await cognee.add([explanation_file_path_quantum], dataset_name)
-
-    edges_count = await graph_engine.count_edges()
     nodes_count = await graph_engine.count_nodes()
 
-    assert edges_count == 0 and nodes_count == 0, "Graph has to be empty before cognify"
+    assert nodes_count == 0, "Graph has to be empty before cognify"
 
     await cognee.cognify([dataset_name])
 
-    edges_count = await graph_engine.count_edges()
     nodes_count = await graph_engine.count_nodes()
 
-    assert edges_count != 0 and nodes_count != 0, "Graph shouldn't be empty"
+    assert nodes_count != 0, "Graph shouldn't be empty"
 
     from cognee.infrastructure.databases.vector import get_vector_engine
 
@@ -136,9 +132,8 @@ async def main():
     assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
     await cognee.prune.prune_system(metadata=True)
-    edges_count = await graph_engine.count_edges()
     nodes_count = await graph_engine.count_nodes()
-    assert nodes_count == 0 and edges_count == 0, "Neo4j graph database is not empty"
+    assert nodes_count == 0, "Neo4j graph database is not empty"
 
 
 if __name__ == "__main__":
diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py
index bce2ea8be..5ff68cecc 100644
--- a/examples/python/dynamic_steps_example.py
+++ b/examples/python/dynamic_steps_example.py
@@ -199,7 +199,7 @@ if __name__ == "__main__":
         "prune_data": rebuild_kg,
         "prune_system": rebuild_kg,
         "add_text": rebuild_kg,
-        "cognify": rebuild_kg,
+        "cognify": False,
         "graph_metrics": rebuild_kg,
         "retriever": retrieve,
     }

From a854e4f42689d7c7fb567c6e4b62443fbb818b19 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 17:22:51 +0100
Subject: [PATCH 29/61] chore: update GraphDBInterface to not throw
 NotImplementedError for count_nodes()

---
 cognee/infrastructure/databases/graph/graph_db_interface.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py
index a4542cefe..d7542eac6 100644
--- a/cognee/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognee/infrastructure/databases/graph/graph_db_interface.py
@@ -161,7 +161,8 @@ class GraphDBInterface(ABC):
 
     @abstractmethod
     async def count_nodes(self) -> int:
-        raise NotImplementedError
+        logger.warning("count_nodes is not implemented")
+        return 1  # dummy value to not fail search()
 
     @abstractmethod
     async def query(self, query: str, params: dict) -> List[Any]:

From c9a3f483987ea78a8ba1f2b199541ff362548638 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 18:26:01 +0200
Subject: [PATCH 30/61] fix: Resolve issue with data element incremental
 loading for multiple datasets

---
 cognee/modules/pipelines/operations/run_tasks_data_item.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cognee/modules/pipelines/operations/run_tasks_data_item.py b/cognee/modules/pipelines/operations/run_tasks_data_item.py
index 94fc631a8..152e72d7f 100644
--- a/cognee/modules/pipelines/operations/run_tasks_data_item.py
+++ b/cognee/modules/pipelines/operations/run_tasks_data_item.py
@@ -115,9 +115,8 @@ async def run_tasks_data_item_incremental(
             data_point = (
                 await session.execute(select(Data).filter(Data.id == data_id))
             ).scalar_one_or_none()
-            data_point.pipeline_status[pipeline_name] = {
-                str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
-            }
+            status_for_pipeline = data_point.pipeline_status.setdefault(pipeline_name, {})
+            status_for_pipeline[str(dataset.id)] = DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
             await session.merge(data_point)
             await session.commit()
 

From 38406a0ab1b3d4d47f8d9fb4e95b4612cd3ce117 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 17:32:48 +0100
Subject: [PATCH 31/61] chore: remove memgraph from cognee repo

---
 .../databases/graph/get_graph_engine.py       |    2 +-
 .../graph/memgraph/memgraph_adapter.py        | 1116 -----------------
 cognee/tests/test_memgraph.py                 |  105 --
 notebooks/neptune-analytics-example.ipynb     |   82 +-
 4 files changed, 42 insertions(+), 1263 deletions(-)
 delete mode 100644 cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
 delete mode 100644 cognee/tests/test_memgraph.py

diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py
index 1861aa15c..1ea61d29f 100644
--- a/cognee/infrastructure/databases/graph/get_graph_engine.py
+++ b/cognee/infrastructure/databases/graph/get_graph_engine.py
@@ -162,5 +162,5 @@ def create_graph_engine(
 
     raise EnvironmentError(
         f"Unsupported graph database provider: {graph_database_provider}. "
-        f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
+        f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'neptune', 'neptune_analytics'])}"
     )
diff --git a/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py b/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
deleted file mode 100644
index 3612e3277..000000000
--- a/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
+++ /dev/null
@@ -1,1116 +0,0 @@
-"""Memgraph Adapter for Graph Database"""
-
-import json
-from cognee.shared.logging_utils import get_logger, ERROR
-import asyncio
-from textwrap import dedent
-from typing import Optional, Any, List, Dict, Type, Tuple
-from contextlib import asynccontextmanager
-from uuid import UUID
-from neo4j import AsyncSession
-from neo4j import AsyncGraphDatabase
-from neo4j.exceptions import Neo4jError
-from cognee.infrastructure.engine import DataPoint
-from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
-from cognee.modules.storage.utils import JSONEncoder
-from cognee.infrastructure.databases.exceptions.exceptions import NodesetFilterNotSupportedError
-
-logger = get_logger("MemgraphAdapter", level=ERROR)
-
-
-class MemgraphAdapter(GraphDBInterface):
-    """
-    Handles interaction with a Memgraph database through various graph operations.
-
-    Public methods include:
-    - get_session
-    - query
-    - has_node
-    - add_node
-    - add_nodes
-    - extract_node
-    - extract_nodes
-    - delete_node
-    - delete_nodes
-    - has_edge
-    - has_edges
-    - add_edge
-    - add_edges
-    - get_edges
-    - get_disconnected_nodes
-    - get_predecessors
-    - get_successors
-    - get_neighbours
-    - get_connections
-    - remove_connection_to_predecessors_of
-    - remove_connection_to_successors_of
-    - delete_graph
-    - serialize_properties
-    - get_model_independent_graph_data
-    - get_graph_data
-    - get_nodeset_subgraph
-    - get_filtered_graph_data
-    - get_node_labels_string
-    - get_relationship_labels_string
-    - get_graph_metrics
-    """
-
-    def __init__(
-        self,
-        graph_database_url: str,
-        graph_database_username: Optional[str] = None,
-        graph_database_password: Optional[str] = None,
-        driver: Optional[Any] = None,
-    ):
-        # Only use auth if both username and password are provided
-        auth = None
-        if graph_database_username and graph_database_password:
-            auth = (graph_database_username, graph_database_password)
-
-        self.driver = driver or AsyncGraphDatabase.driver(
-            graph_database_url,
-            auth=auth,
-            max_connection_lifetime=120,
-        )
-
-    @asynccontextmanager
-    async def get_session(self) -> AsyncSession:
-        """
-        Manage a session with the database, yielding the session for use in operations.
-        """
-        async with self.driver.session() as session:
-            yield session
-
-    async def query(
-        self,
-        query: str,
-        params: Optional[Dict[str, Any]] = None,
-    ) -> List[Dict[str, Any]]:
-        """
-        Execute a provided query on the Memgraph database and return the results.
-
-        Parameters:
-        -----------
-
-            - query (str): The Cypher query to be executed against the database.
-            - params (Optional[Dict[str, Any]]): Optional parameters to be used in the query.
-              (default None)
-
-        Returns:
-        --------
-
-            - List[Dict[str, Any]]: A list of dictionaries representing the result set of the
-              query.
-        """
-        try:
-            async with self.get_session() as session:
-                result = await session.run(query, params)
-                data = await result.data()
-                return data
-        except Neo4jError as error:
-            logger.error("Memgraph query error: %s", error, exc_info=True)
-            raise error
-
-    async def has_node(self, node_id: str) -> bool:
-        """
-        Determine if a node with the given ID exists in the database.
-
-        Parameters:
-        -----------
-
-            - node_id (str): The ID of the node to check for existence.
-
-        Returns:
-        --------
-
-            - bool: True if the node exists; otherwise, False.
-        """
-        results = await self.query(
-            """
-                MATCH (n)
-                WHERE n.id = $node_id
-                RETURN COUNT(n) > 0 AS node_exists
-            """,
-            {"node_id": node_id},
-        )
-        return results[0]["node_exists"] if len(results) > 0 else False
-
-    async def add_node(self, node: DataPoint):
-        """
-        Add a new node to the database with specified properties.
-
-        Parameters:
-        -----------
-
-            - node (DataPoint): The DataPoint object representing the node to add.
-
-        Returns:
-        --------
-
-            The result of the node addition, including its internal ID and node ID.
-        """
-        serialized_properties = self.serialize_properties(node.model_dump())
-
-        query = """
-        MERGE (node {id: $node_id})
-        ON CREATE SET node:$node_label, node += $properties, node.updated_at = timestamp()
-        ON MATCH SET node:$node_label, node += $properties, node.updated_at = timestamp()
-        RETURN ID(node) AS internal_id, node.id AS nodeId
-        """
-
-        params = {
-            "node_id": str(node.id),
-            "node_label": type(node).__name__,
-            "properties": serialized_properties,
-        }
-        return await self.query(query, params)
-
-    async def add_nodes(self, nodes: list[DataPoint]) -> None:
-        """
-        Add multiple nodes to the database in a single operation.
-
-        Parameters:
-        -----------
-
-            - nodes (list[DataPoint]): A list of DataPoint objects representing the nodes to
-              add.
-
-        Returns:
-        --------
-
-            - None: None.
-        """
-        query = """
-        UNWIND $nodes AS node
-        MERGE (n {id: node.node_id})
-        ON CREATE SET n:node.label, n += node.properties, n.updated_at = timestamp()
-        ON MATCH SET n:node.label, n += node.properties, n.updated_at = timestamp()
-        RETURN ID(n) AS internal_id, n.id AS nodeId
-        """
-
-        nodes = [
-            {
-                "node_id": str(node.id),
-                "label": type(node).__name__,
-                "properties": self.serialize_properties(node.model_dump()),
-            }
-            for node in nodes
-        ]
-
-        results = await self.query(query, dict(nodes=nodes))
-        return results
-
-    async def extract_node(self, node_id: str):
-        """
-        Retrieve a single node based on its ID.
-
-        Parameters:
-        -----------
-
-            - node_id (str): The ID of the node to retrieve.
-
-        Returns:
-        --------
-
-            The node corresponding to the provided ID, or None if not found.
-        """
-        results = await self.extract_nodes([node_id])
-
-        return results[0] if len(results) > 0 else None
-
-    async def extract_nodes(self, node_ids: List[str]):
-        """
-        Retrieve multiple nodes based on their IDs.
-
-        Parameters:
-        -----------
-
-            - node_ids (List[str]): A list of IDs for the nodes to retrieve.
-
-        Returns:
-        --------
-
-            A list of nodes corresponding to the provided IDs.
-        """
-        query = """
-        UNWIND $node_ids AS id
-        MATCH (node {id: id})
-        RETURN node"""
-
-        params = {"node_ids": node_ids}
-
-        results = await self.query(query, params)
-
-        return [result["node"] for result in results]
-
-    async def delete_node(self, node_id: str):
-        """
-        Delete a node from the database based on its ID.
-
-        Parameters:
-        -----------
-
-            - node_id (str): The ID of the node to delete.
-
-        Returns:
-        --------
-
-            None.
-        """
-        sanitized_id = node_id.replace(":", "_")
-
-        query = "MATCH (node: {{id: $node_id}}) DETACH DELETE node"
-        params = {"node_id": sanitized_id}
-
-        return await self.query(query, params)
-
-    async def delete_nodes(self, node_ids: list[str]) -> None:
-        """
-        Delete multiple nodes from the database based on their IDs.
-
-        Parameters:
-        -----------
-
-            - node_ids (list[str]): A list of IDs for the nodes to delete.
-
-        Returns:
-        --------
-
-            - None: None.
-        """
-        query = """
-        UNWIND $node_ids AS id
-        MATCH (node {id: id})
-        DETACH DELETE node"""
-
-        params = {"node_ids": node_ids}
-
-        return await self.query(query, params)
-
-    async def has_edge(self, from_node: UUID, to_node: UUID, edge_label: str) -> bool:
-        """
-        Check if a directed edge exists between two nodes identified by their IDs.
-
-        Parameters:
-        -----------
-
-            - from_node (UUID): The ID of the source node.
-            - to_node (UUID): The ID of the target node.
-            - edge_label (str): The label of the edge to check.
-
-        Returns:
-        --------
-
-            - bool: True if the edge exists; otherwise, False.
-        """
-        query = """
-            MATCH (from_node)-[relationship]->(to_node)
-            WHERE from_node.id = $from_node_id AND to_node.id = $to_node_id AND type(relationship) = $edge_label
-            RETURN COUNT(relationship) > 0 AS edge_exists
-        """
-
-        params = {
-            "from_node_id": str(from_node),
-            "to_node_id": str(to_node),
-            "edge_label": edge_label,
-        }
-
-        records = await self.query(query, params)
-        return records[0]["edge_exists"] if records else False
-
-    async def has_edges(self, edges):
-        """
-        Check for the existence of multiple edges based on provided criteria.
-
-        Parameters:
-        -----------
-
-            - edges: A list of edges to verify existence for.
-
-        Returns:
-        --------
-
-            A list of boolean values indicating the existence of each edge.
-        """
-        query = """
-            UNWIND $edges AS edge
-            MATCH (a)-[r]->(b)
-            WHERE id(a) = edge.from_node AND id(b) = edge.to_node AND type(r) = edge.relationship_name
-            RETURN edge.from_node AS from_node, edge.to_node AS to_node, edge.relationship_name AS relationship_name, count(r) > 0 AS edge_exists
-        """
-
-        try:
-            params = {
-                "edges": [
-                    {
-                        "from_node": str(edge[0]),
-                        "to_node": str(edge[1]),
-                        "relationship_name": edge[2],
-                    }
-                    for edge in edges
-                ],
-            }
-
-            results = await self.query(query, params)
-            return [result["edge_exists"] for result in results]
-        except Neo4jError as error:
-            logger.error("Memgraph query error: %s", error, exc_info=True)
-            raise error
-
-    async def add_edge(
-        self,
-        from_node: UUID,
-        to_node: UUID,
-        relationship_name: str,
-        edge_properties: Optional[Dict[str, Any]] = None,
-    ):
-        """
-        Add a directed edge between two nodes with optional properties.
-
-        Parameters:
-        -----------
-
-            - from_node (UUID): The ID of the source node.
-            - to_node (UUID): The ID of the target node.
-            - relationship_name (str): The type/label of the relationship to create.
-            - edge_properties (Optional[Dict[str, Any]]): Optional properties associated with
-              the edge. (default None)
-
-        Returns:
-        --------
-
-            The result of the edge addition operation, including relationship details.
-        """
-
-        exists = await asyncio.gather(self.has_node(str(from_node)), self.has_node(str(to_node)))
-
-        if not all(exists):
-            return None
-
-        serialized_properties = self.serialize_properties(edge_properties or {})
-
-        query = dedent(
-            f"""\
-            MATCH (from_node {{id: $from_node}}),
-                  (to_node {{id: $to_node}})
-            WHERE from_node IS NOT NULL AND to_node IS NOT NULL
-            MERGE (from_node)-[r:{relationship_name}]->(to_node)
-            ON CREATE SET r += $properties, r.updated_at = timestamp()
-            ON MATCH SET r += $properties, r.updated_at = timestamp()
-            RETURN r
-            """
-        )
-
-        params = {
-            "from_node": str(from_node),
-            "to_node": str(to_node),
-            "relationship_name": relationship_name,
-            "properties": serialized_properties,
-        }
-
-        return await self.query(query, params)
-
-    async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None:
-        """
-        Batch add multiple edges between nodes, enforcing specified relationships.
-
-        Parameters:
-        -----------
-
-            - edges (list[tuple[str, str, str, dict[str, Any]]): A list of tuples containing
-              specifications for each edge to add.
-
-        Returns:
-        --------
-
-            - None: None.
-        """
-        query = """
-            UNWIND $edges AS edge
-            MATCH (from_node {id: edge.from_node})
-            MATCH (to_node {id: edge.to_node})
-            CALL merge.relationship(
-                from_node,
-                edge.relationship_name,
-                {
-                    source_node_id: edge.from_node,
-                    target_node_id: edge.to_node
-                },
-                edge.properties,
-                to_node,
-                {}
-            ) YIELD rel
-            RETURN rel"""
-
-        edges = [
-            {
-                "from_node": str(edge[0]),
-                "to_node": str(edge[1]),
-                "relationship_name": edge[2],
-                "properties": {
-                    **(edge[3] if edge[3] else {}),
-                    "source_node_id": str(edge[0]),
-                    "target_node_id": str(edge[1]),
-                },
-            }
-            for edge in edges
-        ]
-
-        try:
-            results = await self.query(query, dict(edges=edges))
-            return results
-        except Neo4jError as error:
-            logger.error("Memgraph query error: %s", error, exc_info=True)
-            raise error
-
-    async def get_edges(self, node_id: str):
-        """
-        Retrieve all edges connected to a specific node identified by its ID.
-
-        Parameters:
-        -----------
-
-            - node_id (str): The ID of the node for which to retrieve connected edges.
-
-        Returns:
-        --------
-
-            A list of tuples representing the edges connected to the node.
-        """
-        query = """
-        MATCH (n {id: $node_id})-[r]-(m)
-        RETURN n, r, m
-        """
-
-        results = await self.query(query, dict(node_id=node_id))
-
-        return [
-            (result["n"]["id"], result["m"]["id"], {"relationship_name": result["r"][1]})
-            for result in results
-        ]
-
-    async def get_disconnected_nodes(self) -> list[str]:
-        """
-        Identify nodes in the graph that do not belong to the largest connected component.
-
-        Returns:
-        --------
-
-            - list[str]: A list of IDs representing the disconnected nodes.
-        """
-        query = """
-        // Step 1: Collect all nodes
-        MATCH (n)
-        WITH COLLECT(n) AS nodes
-
-        // Step 2: Find all connected components
-        WITH nodes
-        CALL {
-          WITH nodes
-          UNWIND nodes AS startNode
-          MATCH path = (startNode)-[*]-(connectedNode)
-          WITH startNode, COLLECT(DISTINCT connectedNode) AS component
-          RETURN component
-        }
-
-        // Step 3: Aggregate components
-        WITH COLLECT(component) AS components
-
-        // Step 4: Identify the largest connected component
-        UNWIND components AS component
-        WITH component
-        ORDER BY SIZE(component) DESC
-        LIMIT 1
-        WITH component AS largestComponent
-
-        // Step 5: Find nodes not in the largest connected component
-        MATCH (n)
-        WHERE NOT n IN largestComponent
-        RETURN COLLECT(ID(n)) AS ids
-        """
-
-        results = await self.query(query)
-        return results[0]["ids"] if len(results) > 0 else []
-
-    async def get_predecessors(self, node_id: str, edge_label: str = None) -> list[str]:
-        """
-        Retrieve all predecessors of a node based on its ID and optional edge label.
-
-        Parameters:
-        -----------
-
-            - node_id (str): The ID of the node to find predecessors for.
-            - edge_label (str): Optional edge label to filter predecessors. (default None)
-
-        Returns:
-        --------
-
-            - list[str]: A list of predecessor node IDs.
-        """
-        if edge_label is not None:
-            query = """
-            MATCH (node)<-[r]-(predecessor)
-            WHERE node.id = $node_id AND type(r) = $edge_label
-            RETURN predecessor
-            """
-
-            results = await self.query(
-                query,
-                dict(
-                    node_id=node_id,
-                    edge_label=edge_label,
-                ),
-            )
-
-            return [result["predecessor"] for result in results]
-        else:
-            query = """
-            MATCH (node)<-[r]-(predecessor)
-            WHERE node.id = $node_id
-            RETURN predecessor
-            """
-
-            results = await self.query(
-                query,
-                dict(
-                    node_id=node_id,
-                ),
-            )
-
-            return [result["predecessor"] for result in results]
-
-    async def get_successors(self, node_id: str, edge_label: str = None) -> list[str]:
-        """
-        Retrieve all successors of a node based on its ID and optional edge label.
-
-        Parameters:
-        -----------
-
-            - node_id (str): The ID of the node to find successors for.
-            - edge_label (str): Optional edge label to filter successors. (default None)
-
-        Returns:
-        --------
-
-            - list[str]: A list of successor node IDs.
-        """
-        if edge_label is not None:
-            query = """
-            MATCH (node)-[r]->(successor)
-            WHERE node.id = $node_id AND type(r) = $edge_label
-            RETURN successor
-            """
-
-            results = await self.query(
-                query,
-                dict(
-                    node_id=node_id,
-                    edge_label=edge_label,
-                ),
-            )
-
-            return [result["successor"] for result in results]
-        else:
-            query = """
-            MATCH (node)-[r]->(successor)
-            WHERE node.id = $node_id
-            RETURN successor
-            """
-
-            results = await self.query(
-                query,
-                dict(
-                    node_id=node_id,
-                ),
-            )
-
-            return [result["successor"] for result in results]
-
-    async def get_neighbors(self, node_id: str) -> List[Dict[str, Any]]:
-        """
-        Get both predecessors and successors of a node.
-
-        Parameters:
-        -----------
-
-            - node_id (str): The ID of the node to find neighbors for.
-
-        Returns:
-        --------
-
-            - List[Dict[str, Any]]: A combined list of neighbor node IDs.
-        """
-        predecessors, successors = await asyncio.gather(
-            self.get_predecessors(node_id), self.get_successors(node_id)
-        )
-
-        return predecessors + successors
-
-    async def get_node(self, node_id: str) -> Optional[Dict[str, Any]]:
-        """Get a single node by ID."""
-        query = """
-        MATCH (node {id: $node_id})
-        RETURN node
-        """
-        results = await self.query(query, {"node_id": node_id})
-        return results[0]["node"] if results else None
-
-    async def get_nodes(self, node_ids: List[str]) -> List[Dict[str, Any]]:
-        """Get multiple nodes by their IDs."""
-        query = """
-        UNWIND $node_ids AS id
-        MATCH (node {id: id})
-        RETURN node
-        """
-        results = await self.query(query, {"node_ids": node_ids})
-        return [result["node"] for result in results]
-
-    async def get_connections(self, node_id: UUID) -> list:
-        """
-        Retrieve connections for a given node, including both predecessors and successors.
-
-        Parameters:
-        -----------
-
-            - node_id (UUID): The ID of the node for which to retrieve connections.
-
-        Returns:
-        --------
-
-            - list: A list of connections associated with the node.
-        """
-        predecessors_query = """
-        MATCH (node)<-[relation]-(neighbour)
-        WHERE node.id = $node_id
-        RETURN neighbour, relation, node
-        """
-        successors_query = """
-        MATCH (node)-[relation]->(neighbour)
-        WHERE node.id = $node_id
-        RETURN node, relation, neighbour
-        """
-
-        predecessors, successors = await asyncio.gather(
-            self.query(predecessors_query, dict(node_id=str(node_id))),
-            self.query(successors_query, dict(node_id=str(node_id))),
-        )
-
-        connections = []
-
-        for neighbour in predecessors:
-            neighbour = neighbour["relation"]
-            connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
-
-        for neighbour in successors:
-            neighbour = neighbour["relation"]
-            connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
-
-        return connections
-
-    async def remove_connection_to_predecessors_of(
-        self, node_ids: list[str], edge_label: str
-    ) -> None:
-        """
-        Remove specified connections to the predecessors of the given node IDs.
-
-        Parameters:
-        -----------
-
-            - node_ids (list[str]): A list of node IDs from which to remove predecessor
-              connections.
-            - edge_label (str): The label of the edges to remove.
-
-        Returns:
-        --------
-
-            - None: None.
-        """
-        query = f"""
-        UNWIND $node_ids AS nid
-        MATCH (node {id: nid})-[r]->(predecessor)
-        WHERE type(r) = $edge_label
-        DELETE r;
-        """
-
-        params = {"node_ids": node_ids, "edge_label": edge_label}
-
-        return await self.query(query, params)
-
-    async def remove_connection_to_successors_of(
-        self, node_ids: list[str], edge_label: str
-    ) -> None:
-        """
-        Remove specified connections to the successors of the given node IDs.
-
-        Parameters:
-        -----------
-
-            - node_ids (list[str]): A list of node IDs from which to remove successor
-              connections.
-            - edge_label (str): The label of the edges to remove.
-
-        Returns:
-        --------
-
-            - None: None.
-        """
-        query = f"""
-        UNWIND $node_ids AS id
-        MATCH (node:`{id}`)<-[r:{edge_label}]-(successor)
-        DELETE r;
-        """
-
-        params = {"node_ids": node_ids}
-
-        return await self.query(query, params)
-
-    async def delete_graph(self):
-        """
-        Completely delete the graph from the database, removing all nodes and edges.
-
-        Returns:
-        --------
-
-            None.
-        """
-        query = """MATCH (node)
-                DETACH DELETE node;"""
-
-        return await self.query(query)
-
-    def serialize_properties(self, properties=dict()):
-        """
-        Convert property values to a suitable representation for storage.
-
-        Parameters:
-        -----------
-
-            - properties: A dictionary of properties to serialize. (default dict())
-
-        Returns:
-        --------
-
-            A dictionary of serialized properties.
-        """
-        serialized_properties = {}
-
-        for property_key, property_value in properties.items():
-            if isinstance(property_value, UUID):
-                serialized_properties[property_key] = str(property_value)
-                continue
-
-            if isinstance(property_value, dict):
-                serialized_properties[property_key] = json.dumps(property_value, cls=JSONEncoder)
-                continue
-
-            serialized_properties[property_key] = property_value
-
-        return serialized_properties
-
-    async def get_model_independent_graph_data(self):
-        """
-        Fetch nodes and relationships without any specific model filtering.
-
-        Returns:
-        --------
-
-            A tuple containing nodes and edges as collections.
-        """
-        query_nodes = "MATCH (n) RETURN collect(n) AS nodes"
-        nodes = await self.query(query_nodes)
-
-        query_edges = "MATCH (n)-[r]->(m) RETURN collect([n, r, m]) AS elements"
-        edges = await self.query(query_edges)
-
-        return (nodes, edges)
-
-    async def get_graph_data(self):
-        """
-        Retrieve all nodes and edges from the graph, including their properties.
-
-        Returns:
-        --------
-
-            A tuple containing lists of nodes and edges.
-        """
-        query = "MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties"
-
-        result = await self.query(query)
-
-        nodes = [
-            (
-                record["id"],
-                record["properties"],
-            )
-            for record in result
-        ]
-
-        query = """
-        MATCH (n)-[r]->(m)
-        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
-        """
-        result = await self.query(query)
-        edges = [
-            (
-                record["source"],
-                record["target"],
-                record["type"],
-                record["properties"],
-            )
-            for record in result
-        ]
-
-        return (nodes, edges)
-
-    async def get_nodeset_subgraph(
-        self, node_type: Type[Any], node_name: List[str]
-    ) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
-        """
-        Throw an error indicating that node set filtering is not supported.
-
-        Parameters:
-        -----------
-
-            - node_type (Type[Any]): The type of nodes to filter.
-            - node_name (List[str]): A list of node names to filter.
-        """
-        raise NodesetFilterNotSupportedError
-
-    async def get_filtered_graph_data(self, attribute_filters):
-        """
-        Fetch nodes and relationships based on specified attribute filters.
-
-        Parameters:
-        -----------
-
-            - attribute_filters: A list of criteria to filter nodes and relationships.
-
-        Returns:
-        --------
-
-            A tuple containing filtered nodes and edges.
-        """
-        where_clauses = []
-        for attribute, values in attribute_filters[0].items():
-            values_str = ", ".join(
-                f"'{value}'" if isinstance(value, str) else str(value) for value in values
-            )
-            where_clauses.append(f"n.{attribute} IN [{values_str}]")
-
-        where_clause = " AND ".join(where_clauses)
-
-        query_nodes = f"""
-        MATCH (n)
-        WHERE {where_clause}
-        RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
-        """
-        result_nodes = await self.query(query_nodes)
-
-        nodes = [
-            (
-                record["id"],
-                record["properties"],
-            )
-            for record in result_nodes
-        ]
-
-        query_edges = f"""
-        MATCH (n)-[r]->(m)
-        WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
-        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
-        """
-        result_edges = await self.query(query_edges)
-
-        edges = [
-            (
-                record["source"],
-                record["target"],
-                record["type"],
-                record["properties"],
-            )
-            for record in result_edges
-        ]
-
-        return (nodes, edges)
-
-    async def get_node_labels_string(self):
-        """
-        Retrieve a string representation of all unique node labels in the graph.
-
-        Returns:
-        --------
-
-            A string containing unique node labels.
-        """
-        node_labels_query = """
-        MATCH (n)
-        WITH DISTINCT labels(n) AS labelList
-        UNWIND labelList AS label
-        RETURN collect(DISTINCT label) AS labels;
-        """
-        node_labels_result = await self.query(node_labels_query)
-        node_labels = node_labels_result[0]["labels"] if node_labels_result else []
-
-        if not node_labels:
-            raise ValueError("No node labels found in the database")
-
-        node_labels_str = "[" + ", ".join(f"'{label}'" for label in node_labels) + "]"
-        return node_labels_str
-
-    async def get_relationship_labels_string(self):
-        """
-        Retrieve a string representation of all unique relationship types in the graph.
-
-        Returns:
-        --------
-
-            A string containing unique relationship types.
-        """
-        relationship_types_query = (
-            "MATCH ()-[r]->() RETURN collect(DISTINCT type(r)) AS relationships;"
-        )
-        relationship_types_result = await self.query(relationship_types_query)
-        relationship_types = (
-            relationship_types_result[0]["relationships"] if relationship_types_result else []
-        )
-
-        if not relationship_types:
-            raise ValueError("No relationship types found in the database.")
-
-        relationship_types_undirected_str = (
-            "{"
-            + ", ".join(f"{rel}" + ": {orientation: 'UNDIRECTED'}" for rel in relationship_types)
-            + "}"
-        )
-        return relationship_types_undirected_str
-
-    async def get_graph_metrics(self, include_optional=False):
-        """
-        Calculate and return various metrics of the graph, including mandatory and optional
-        metrics.
-
-        Parameters:
-        -----------
-
-            - include_optional: Specify whether to include optional metrics in the results.
-              (default False)
-
-        Returns:
-        --------
-
-            A dictionary containing calculated graph metrics.
-        """
-
-        try:
-            # Basic metrics
-            node_count = await self.query("MATCH (n) RETURN count(n)")
-            edge_count = await self.query("MATCH ()-[r]->() RETURN count(r)")
-            num_nodes = node_count[0][0] if node_count else 0
-            num_edges = edge_count[0][0] if edge_count else 0
-
-            # Calculate mandatory metrics
-            mandatory_metrics = {
-                "num_nodes": num_nodes,
-                "num_edges": num_edges,
-                "mean_degree": (2 * num_edges) / num_nodes if num_nodes > 0 else 0,
-                "edge_density": (num_edges) / (num_nodes * (num_nodes - 1)) if num_nodes > 1 else 0,
-            }
-
-            # Calculate connected components
-            components_query = """
-            MATCH (n:Node)
-            WITH n.id AS node_id
-            MATCH path = (n)-[:EDGE*0..]-()
-            WITH COLLECT(DISTINCT node_id) AS component
-            RETURN COLLECT(component) AS components
-            """
-            components_result = await self.query(components_query)
-            component_sizes = (
-                [len(comp) for comp in components_result[0][0]] if components_result else []
-            )
-
-            mandatory_metrics.update(
-                {
-                    "num_connected_components": len(component_sizes),
-                    "sizes_of_connected_components": component_sizes,
-                }
-            )
-
-            if include_optional:
-                # Self-loops
-                self_loops_query = """
-                MATCH (n:Node)-[r:EDGE]->(n)
-                RETURN COUNT(r)
-                """
-                self_loops = await self.query(self_loops_query)
-                num_selfloops = self_loops[0][0] if self_loops else 0
-
-                # Shortest paths (simplified for Kuzu)
-                paths_query = """
-                MATCH (n:Node), (m:Node)
-                WHERE n.id < m.id
-                MATCH path = (n)-[:EDGE*]-(m)
-                RETURN MIN(LENGTH(path)) AS length
-                """
-                paths = await self.query(paths_query)
-                path_lengths = [p[0] for p in paths if p[0] is not None]
-
-                # Local clustering coefficient
-                clustering_query = """
-                /// Step 1: Get each node with its neighbors and degree
-                MATCH (n:Node)-[:EDGE]-(neighbor)
-                WITH n, COLLECT(DISTINCT neighbor) AS neighbors, COUNT(DISTINCT neighbor) AS degree
-
-                // Step 2: Pair up neighbors and check if they are connected
-                UNWIND neighbors AS n1
-                UNWIND neighbors AS n2
-                WITH n, degree, n1, n2
-                WHERE id(n1) < id(n2)  // avoid duplicate pairs
-
-                // Step 3: Use OPTIONAL MATCH to see if n1 and n2 are connected
-                OPTIONAL MATCH (n1)-[:EDGE]-(n2)
-                WITH n, degree, COUNT(n2) AS triangle_count
-
-                // Step 4: Compute local clustering coefficient
-                WITH n, degree,
-                    CASE WHEN degree <= 1 THEN 0.0
-                        ELSE (1.0 * triangle_count) / (degree * (degree - 1) / 2.0)
-                    END AS local_cc
-
-                // Step 5: Compute average
-                RETURN AVG(local_cc) AS avg_clustering_coefficient
-                """
-                clustering = await self.query(clustering_query)
-
-                optional_metrics = {
-                    "num_selfloops": num_selfloops,
-                    "diameter": max(path_lengths) if path_lengths else -1,
-                    "avg_shortest_path_length": sum(path_lengths) / len(path_lengths)
-                    if path_lengths
-                    else -1,
-                    "avg_clustering": clustering[0][0] if clustering and clustering[0][0] else -1,
-                }
-            else:
-                optional_metrics = {
-                    "num_selfloops": -1,
-                    "diameter": -1,
-                    "avg_shortest_path_length": -1,
-                    "avg_clustering": -1,
-                }
-
-            return {**mandatory_metrics, **optional_metrics}
-
-        except Exception as e:
-            logger.error(f"Failed to get graph metrics: {e}")
-            return {
-                "num_nodes": 0,
-                "num_edges": 0,
-                "mean_degree": 0,
-                "edge_density": 0,
-                "num_connected_components": 0,
-                "sizes_of_connected_components": [],
-                "num_selfloops": -1,
-                "diameter": -1,
-                "avg_shortest_path_length": -1,
-                "avg_clustering": -1,
-            }
diff --git a/cognee/tests/test_memgraph.py b/cognee/tests/test_memgraph.py
deleted file mode 100644
index d0d968fc4..000000000
--- a/cognee/tests/test_memgraph.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import os
-
-import pathlib
-import cognee
-from cognee.infrastructure.files.storage import get_storage_config
-from cognee.modules.search.operations import get_history
-from cognee.modules.users.methods import get_default_user
-from cognee.shared.logging_utils import get_logger
-from cognee.modules.search.types import SearchType
-
-
-logger = get_logger()
-
-
-async def main():
-    cognee.config.set_graph_database_provider("memgraph")
-    data_directory_path = str(
-        pathlib.Path(
-            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_memgraph")
-        ).resolve()
-    )
-    cognee.config.data_root_directory(data_directory_path)
-    cognee_directory_path = str(
-        pathlib.Path(
-            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_memgraph")
-        ).resolve()
-    )
-    cognee.config.system_root_directory(cognee_directory_path)
-
-    await cognee.prune.prune_data()
-    await cognee.prune.prune_system(metadata=True)
-
-    dataset_name = "cs_explanations"
-
-    explanation_file_path_nlp = os.path.join(
-        pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
-    )
-    await cognee.add([explanation_file_path_nlp], dataset_name)
-
-    explanation_file_path_quantum = os.path.join(
-        pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
-    )
-
-    await cognee.add([explanation_file_path_quantum], dataset_name)
-
-    await cognee.cognify([dataset_name])
-
-    from cognee.infrastructure.databases.vector import get_vector_engine
-
-    vector_engine = get_vector_engine()
-    random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
-    random_node_name = random_node.payload["text"]
-
-    search_results = await cognee.search(
-        query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
-    )
-    assert len(search_results) != 0, "The search results list is empty."
-    print("\n\nExtracted sentences are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
-    assert len(search_results) != 0, "The search results list is empty."
-    print("\n\nExtracted chunks are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    search_results = await cognee.search(
-        query_type=SearchType.SUMMARIES, query_text=random_node_name
-    )
-    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\nExtracted results are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    search_results = await cognee.search(
-        query_type=SearchType.NATURAL_LANGUAGE,
-        query_text=f"Find nodes connected to node with name {random_node_name}",
-    )
-    assert len(search_results) != 0, "Query related natural language don't exist."
-    print("\nExtracted results are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    user = await get_default_user()
-    history = await get_history(user.id)
-
-    assert len(history) == 8, "Search history is not correct."
-
-    await cognee.prune.prune_data()
-    data_root_directory = get_storage_config()["data_root_directory"]
-    assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
-
-    await cognee.prune.prune_system(metadata=True)
-    from cognee.infrastructure.databases.graph import get_graph_engine
-
-    graph_engine = await get_graph_engine()
-    nodes, edges = await graph_engine.get_graph_data()
-    assert len(nodes) == 0 and len(edges) == 0, "Memgraph graph database is not empty"
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    asyncio.run(main())
diff --git a/notebooks/neptune-analytics-example.ipynb b/notebooks/neptune-analytics-example.ipynb
index e80ea4dcb..c85ccf58a 100644
--- a/notebooks/neptune-analytics-example.ipynb
+++ b/notebooks/neptune-analytics-example.ipynb
@@ -83,16 +83,16 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import os\n",
     "import pathlib\n",
     "from cognee import config, add, cognify, search, SearchType, prune, visualize_graph\n",
     "from dotenv import load_dotenv"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "markdown",
@@ -106,7 +106,9 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# load environment variables from file .env\n",
     "load_dotenv()\n",
@@ -145,9 +147,7 @@
     "        \"vector_db_url\": f\"neptune-graph://{graph_identifier}\",  # Neptune Analytics endpoint with the format neptune-graph://<GRAPH_ID>\n",
     "    }\n",
     ")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "markdown",
@@ -159,19 +159,19 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Prune data and system metadata before running, only if we want \"fresh\" state.\n",
     "await prune.prune_data()\n",
     "await prune.prune_system(metadata=True)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## Setup data and cognify\n",
     "\n",
@@ -180,7 +180,9 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Add sample text to the dataset\n",
     "sample_text_1 = \"\"\"Neptune Analytics is a memory-optimized graph database engine for analytics. With Neptune\n",
@@ -205,9 +207,7 @@
     "\n",
     "# Cognify the text data.\n",
     "await cognify([dataset_name])"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "markdown",
@@ -215,14 +215,16 @@
    "source": [
     "## Graph Memory visualization\n",
     "\n",
-    "Initialize Memgraph as a Graph Memory store and save to .artefacts/graph_visualization.html\n",
+    "Initialize Neptune as a Graph Memory store and save to .artefacts/graph_visualization.html\n",
     "\n",
     "![visualization](./neptune_analytics_demo.png)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Get a graphistry url (Register for a free account at https://www.graphistry.com)\n",
     "# url = await render_graph()\n",
@@ -235,9 +237,7 @@
     "    ).resolve()\n",
     ")\n",
     "await visualize_graph(graph_file_path)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "markdown",
@@ -250,19 +250,19 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Completion query that uses graph data to form context.\n",
     "graph_completion = await search(query_text=\"What is Neptune Analytics?\", query_type=SearchType.GRAPH_COMPLETION)\n",
     "print(\"\\nGraph completion result is:\")\n",
     "print(graph_completion)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## SEARCH: RAG Completion\n",
     "\n",
@@ -271,19 +271,19 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Completion query that uses document chunks to form context.\n",
     "rag_completion = await search(query_text=\"What is Neptune Analytics?\", query_type=SearchType.RAG_COMPLETION)\n",
     "print(\"\\nRAG Completion result is:\")\n",
     "print(rag_completion)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## SEARCH: Graph Insights\n",
     "\n",
@@ -291,8 +291,10 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Search graph insights\n",
     "insights_results = await search(query_text=\"Neptune Analytics\", query_type=SearchType.GRAPH_COMPLETION)\n",
@@ -302,13 +304,11 @@
     "    tgt_node = result[2].get(\"name\", result[2][\"type\"])\n",
     "    relationship = result[1].get(\"relationship_name\", \"__relationship__\")\n",
     "    print(f\"- {src_node} -[{relationship}]-> {tgt_node}\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## SEARCH: Entity Summaries\n",
     "\n",
@@ -316,8 +316,10 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Query all summaries related to query.\n",
     "summaries = await search(query_text=\"Neptune Analytics\", query_type=SearchType.SUMMARIES)\n",
@@ -326,13 +328,11 @@
     "    type = summary[\"type\"]\n",
     "    text = summary[\"text\"]\n",
     "    print(f\"- {type}: {text}\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## SEARCH: Chunks\n",
     "\n",
@@ -340,8 +340,10 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "chunks = await search(query_text=\"Neptune Analytics\", query_type=SearchType.CHUNKS)\n",
     "print(\"\\nChunk results are:\")\n",
@@ -349,9 +351,7 @@
     "    type = chunk[\"type\"]\n",
     "    text = chunk[\"text\"]\n",
     "    print(f\"- {type}: {text}\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   }
  ],
  "metadata": {

From 2a6256634e2829a63e11f2c5de9f7d0ad7dac44f Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Wed, 15 Oct 2025 17:35:46 +0100
Subject: [PATCH 32/61] chore: revert temporary change to
 dynamic_steps_example.py

---
 examples/python/dynamic_steps_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py
index 5ff68cecc..bce2ea8be 100644
--- a/examples/python/dynamic_steps_example.py
+++ b/examples/python/dynamic_steps_example.py
@@ -199,7 +199,7 @@ if __name__ == "__main__":
         "prune_data": rebuild_kg,
         "prune_system": rebuild_kg,
         "add_text": rebuild_kg,
-        "cognify": False,
+        "cognify": rebuild_kg,
         "graph_metrics": rebuild_kg,
         "retriever": retrieve,
     }

From 99dc35f23e26e4cd2016f50a6c783f6a0a1749e1 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 20:01:09 +0200
Subject: [PATCH 33/61] fix: resolve issue with neo4j metrics test

---
 .../tasks/descriptive_metrics/metrics_test_utils.py   |  2 --
 .../tasks/descriptive_metrics/neo4j_metrics_test.py   | 11 ++++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py b/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
index 911d9c33b..579a499fd 100644
--- a/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
+++ b/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
@@ -1,7 +1,6 @@
 from typing import List
 from cognee.infrastructure.engine import DataPoint
 from cognee.tasks.storage.add_data_points import add_data_points
-from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
 import cognee
 from cognee.infrastructure.databases.graph import get_graph_engine
 import json
@@ -64,7 +63,6 @@ async def create_connected_test_graph():
 
 
 async def get_metrics(provider: str, include_optional=True):
-    create_graph_engine.cache_clear()
     cognee.config.set_graph_database_provider(provider)
     graph_engine = await get_graph_engine()
     await graph_engine.delete_graph()
diff --git a/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py b/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
index 2ca9e9f7e..8d7a6ab02 100644
--- a/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
+++ b/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
@@ -1,7 +1,12 @@
-from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import assert_metrics
 import asyncio
 
 
+async def main():
+    from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import assert_metrics
+
+    await assert_metrics(provider="neo4j", include_optional=False)
+    await assert_metrics(provider="neo4j", include_optional=True)
+
+
 if __name__ == "__main__":
-    asyncio.run(assert_metrics(provider="neo4j", include_optional=False))
-    asyncio.run(assert_metrics(provider="neo4j", include_optional=True))
+    asyncio.run(main())

From 2fb06e07299a53c2e5412cbe30e851b26e97b783 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 20:18:48 +0200
Subject: [PATCH 34/61] refactor: forwarding of data batch size rework

---
 cognee/api/v1/add/add.py                         |  2 ++
 cognee/api/v1/cognify/cognify.py                 |  2 ++
 cognee/modules/pipelines/operations/pipeline.py  |  5 ++++-
 cognee/modules/pipelines/operations/run_tasks.py | 12 +++++-------
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py
index 65394f1ec..b5a8a230f 100644
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@@ -41,6 +41,7 @@ async def add(
     extraction_rules: Optional[Dict[str, Any]] = None,
     tavily_config: Optional[BaseModel] = None,
     soup_crawler_config: Optional[BaseModel] = None,
+    data_batch_size: Optional[int] = 20,
 ):
     """
     Add data to Cognee for knowledge graph processing.
@@ -235,6 +236,7 @@ async def add(
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
+        data_batch_size=data_batch_size,
     ):
         pipeline_run_info = run_info
 
diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index c3045f00a..ab5e4a023 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -51,6 +51,7 @@ async def cognify(
     incremental_loading: bool = True,
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
+    data_batch_size: int = 20,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -228,6 +229,7 @@ async def cognify(
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
         pipeline_name="cognify_pipeline",
+        data_batch_size=data_batch_size,
     )
 
 
diff --git a/cognee/modules/pipelines/operations/pipeline.py b/cognee/modules/pipelines/operations/pipeline.py
index b59a171f7..9d61235c1 100644
--- a/cognee/modules/pipelines/operations/pipeline.py
+++ b/cognee/modules/pipelines/operations/pipeline.py
@@ -35,6 +35,7 @@ async def run_pipeline(
     vector_db_config: dict = None,
     graph_db_config: dict = None,
     incremental_loading: bool = False,
+    data_batch_size: int = 20,
 ):
     validate_pipeline_tasks(tasks)
     await setup_and_check_environment(vector_db_config, graph_db_config)
@@ -50,6 +51,7 @@ async def run_pipeline(
             pipeline_name=pipeline_name,
             context={"dataset": dataset},
             incremental_loading=incremental_loading,
+            data_batch_size=data_batch_size,
         ):
             yield run_info
 
@@ -62,6 +64,7 @@ async def run_pipeline_per_dataset(
     pipeline_name: str = "custom_pipeline",
     context: dict = None,
     incremental_loading=False,
+    data_batch_size: int = 20,
 ):
     # Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
     await set_database_global_context_variables(dataset.id, dataset.owner_id)
@@ -77,7 +80,7 @@ async def run_pipeline_per_dataset(
         return
 
     pipeline_run = run_tasks(
-        tasks, dataset.id, data, user, pipeline_name, context, incremental_loading
+        tasks, dataset.id, data, user, pipeline_name, context, incremental_loading, data_batch_size
     )
 
     async for pipeline_run_info in pipeline_run:
diff --git a/cognee/modules/pipelines/operations/run_tasks.py b/cognee/modules/pipelines/operations/run_tasks.py
index 2e0055384..18eaf8011 100644
--- a/cognee/modules/pipelines/operations/run_tasks.py
+++ b/cognee/modules/pipelines/operations/run_tasks.py
@@ -24,14 +24,11 @@ from cognee.modules.pipelines.operations import (
     log_pipeline_run_complete,
     log_pipeline_run_error,
 )
-from .run_tasks_with_telemetry import run_tasks_with_telemetry
 from .run_tasks_data_item import run_tasks_data_item
 from ..tasks.task import Task
 
 
 logger = get_logger("run_tasks(tasks: [Task], data)")
-# TODO: See if this parameter should be configurable as input for run_tasks itself
-DOCUMENT_BATCH_SIZE = 10
 
 
 def override_run_tasks(new_gen):
@@ -62,6 +59,7 @@ async def run_tasks(
     pipeline_name: str = "unknown_pipeline",
     context: dict = None,
     incremental_loading: bool = False,
+    data_batch_size: int = 20,
 ):
     if not user:
         user = await get_default_user()
@@ -93,12 +91,12 @@ async def run_tasks(
 
         # Create and gather batches of async tasks of data items that will run the pipeline for the data item
         results = []
-        for start in range(0, len(data), DOCUMENT_BATCH_SIZE):
-            document_batch = data[start : start + DOCUMENT_BATCH_SIZE]
+        for start in range(0, len(data), data_batch_size):
+            data_batch = data[start : start + data_batch_size]
 
             data_item_tasks = [
                 asyncio.create_task(
-                    _run_tasks_data_item(
+                    run_tasks_data_item(
                         data_item,
                         dataset,
                         tasks,
@@ -110,7 +108,7 @@ async def run_tasks(
                         incremental_loading,
                     )
                 )
-                for data_item in document_batch
+                for data_item in data_batch
             ]
 
             results.extend(await asyncio.gather(*data_item_tasks))

From 3a9022a26c1e26b1b70867b441096b266f884cc5 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 20:22:29 +0200
Subject: [PATCH 35/61] refactor: Rename batch size for tasks to chunk batch
 size

---
 cognee/api/v1/cognify/cognify.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index d29d8c939..e0f6253d8 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -44,7 +44,7 @@ async def cognify(
     graph_model: BaseModel = KnowledgeGraph,
     chunker=TextChunker,
     chunk_size: int = None,
-    batch_size: int = None,
+    chunk_batch_size: int = None,
     config: Config = None,
     vector_db_config: dict = None,
     graph_db_config: dict = None,
@@ -106,7 +106,7 @@ async def cognify(
                    Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                    Default limits: ~512-8192 tokens depending on models.
                    Smaller chunks = more granular but potentially fragmented knowledge.
-        batch_size: Number of chunks to be processed in a single batch in Cognify tasks.
+        chunk_batch_size: Number of chunks to be processed in a single batch in Cognify tasks.
         vector_db_config: Custom vector database configuration for embeddings storage.
         graph_db_config: Custom graph database configuration for relationship storage.
         run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -212,7 +212,7 @@ async def cognify(
 
     if temporal_cognify:
         tasks = await get_temporal_tasks(
-            user=user, chunker=chunker, chunk_size=chunk_size, batch_size=batch_size
+            user=user, chunker=chunker, chunk_size=chunk_size, chunk_batch_size=chunk_batch_size
         )
     else:
         tasks = await get_default_tasks(
@@ -222,7 +222,7 @@ async def cognify(
             chunk_size=chunk_size,
             config=config,
             custom_prompt=custom_prompt,
-            batch_size=batch_size,
+            chunk_batch_size=chunk_batch_size,
         )
 
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -248,7 +248,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunk_size: int = None,
     config: Config = None,
     custom_prompt: Optional[str] = None,
-    batch_size: int = 100,
+    chunk_batch_size: int = 100,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -267,8 +267,8 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                 "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
             }
 
-    if batch_size is None:
-        batch_size = 100
+    if chunk_batch_size is None:
+        chunk_batch_size = 100
 
     default_tasks = [
         Task(classify_documents),
@@ -283,20 +283,20 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": batch_size},
+            task_config={"batch_size": chunk_batch_size},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": batch_size},
+            task_config={"batch_size": chunk_batch_size},
         ),
-        Task(add_data_points, task_config={"batch_size": batch_size}),
+        Task(add_data_points, task_config={"batch_size": chunk_batch_size}),
     ]
 
     return default_tasks
 
 
 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None, batch_size: int = 10
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunk_batch_size: int = 10
 ) -> list[Task]:
     """
     Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -313,13 +313,13 @@ async def get_temporal_tasks(
         user (User, optional): The user requesting task execution, used for permission checks.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
-        batch_size (int, optional): Number of chunks to process in a single batch in Cognify
+        chunk_batch_size (int, optional): Number of chunks to process in a single batch in Cognify
 
     Returns:
         list[Task]: A list of Task objects representing the temporal processing pipeline.
     """
-    if batch_size is None:
-        batch_size = 10
+    if chunk_batch_size is None:
+        chunk_batch_size = 10
 
     temporal_tasks = [
         Task(classify_documents),
@@ -329,9 +329,9 @@ async def get_temporal_tasks(
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
             chunker=chunker,
         ),
-        Task(extract_events_and_timestamps, task_config={"batch_size": batch_size}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": chunk_batch_size}),
         Task(extract_knowledge_graph_from_events),
-        Task(add_data_points, task_config={"batch_size": batch_size}),
+        Task(add_data_points, task_config={"batch_size": chunk_batch_size}),
     ]
 
     return temporal_tasks

From a210bd59054dd353675589c63e57fe9d7349b766 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 20:24:36 +0200
Subject: [PATCH 36/61] refactor: rename chunk_batch_size to chunks_per_batch

---
 cognee/api/v1/cognify/cognify.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index e0f6253d8..1d5c36a3c 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -44,7 +44,7 @@ async def cognify(
     graph_model: BaseModel = KnowledgeGraph,
     chunker=TextChunker,
     chunk_size: int = None,
-    chunk_batch_size: int = None,
+    chunks_per_batch: int = None,
     config: Config = None,
     vector_db_config: dict = None,
     graph_db_config: dict = None,
@@ -106,7 +106,7 @@ async def cognify(
                    Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                    Default limits: ~512-8192 tokens depending on models.
                    Smaller chunks = more granular but potentially fragmented knowledge.
-        chunk_batch_size: Number of chunks to be processed in a single batch in Cognify tasks.
+        chunks_per_batch: Number of chunks to be processed in a single batch in Cognify tasks.
         vector_db_config: Custom vector database configuration for embeddings storage.
         graph_db_config: Custom graph database configuration for relationship storage.
         run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -212,7 +212,7 @@ async def cognify(
 
     if temporal_cognify:
         tasks = await get_temporal_tasks(
-            user=user, chunker=chunker, chunk_size=chunk_size, chunk_batch_size=chunk_batch_size
+            user=user, chunker=chunker, chunk_size=chunk_size, chunks_per_batch=chunks_per_batch
         )
     else:
         tasks = await get_default_tasks(
@@ -222,7 +222,7 @@ async def cognify(
             chunk_size=chunk_size,
             config=config,
             custom_prompt=custom_prompt,
-            chunk_batch_size=chunk_batch_size,
+            chunks_per_batch=chunks_per_batch,
         )
 
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -248,7 +248,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunk_size: int = None,
     config: Config = None,
     custom_prompt: Optional[str] = None,
-    chunk_batch_size: int = 100,
+    chunks_per_batch: int = 100,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -267,8 +267,8 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                 "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
             }
 
-    if chunk_batch_size is None:
-        chunk_batch_size = 100
+    if chunks_per_batch is None:
+        chunks_per_batch = 100
 
     default_tasks = [
         Task(classify_documents),
@@ -283,20 +283,20 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": chunk_batch_size},
+            task_config={"batch_size": chunks_per_batch},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": chunk_batch_size},
+            task_config={"batch_size": chunks_per_batch},
         ),
-        Task(add_data_points, task_config={"batch_size": chunk_batch_size}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
 
     return default_tasks
 
 
 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None, chunk_batch_size: int = 10
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
 ) -> list[Task]:
     """
     Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -313,13 +313,13 @@ async def get_temporal_tasks(
         user (User, optional): The user requesting task execution, used for permission checks.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
-        chunk_batch_size (int, optional): Number of chunks to process in a single batch in Cognify
+        chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
 
     Returns:
         list[Task]: A list of Task objects representing the temporal processing pipeline.
     """
-    if chunk_batch_size is None:
-        chunk_batch_size = 10
+    if chunks_per_batch is None:
+        chunks_per_batch = 10
 
     temporal_tasks = [
         Task(classify_documents),
@@ -329,9 +329,9 @@ async def get_temporal_tasks(
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
             chunker=chunker,
         ),
-        Task(extract_events_and_timestamps, task_config={"batch_size": chunk_batch_size}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": chunks_per_batch}),
         Task(extract_knowledge_graph_from_events),
-        Task(add_data_points, task_config={"batch_size": chunk_batch_size}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
 
     return temporal_tasks

From 2e1bfe78b1d63b2b089235d2cc7a7742a208d3f5 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 20:26:59 +0200
Subject: [PATCH 37/61] refactor: rename variable to be more understandable

---
 cognee/api/v1/add/add.py                         | 4 ++--
 cognee/api/v1/cognify/cognify.py                 | 4 ++--
 cognee/modules/pipelines/operations/pipeline.py  | 8 ++++----
 cognee/modules/pipelines/operations/run_tasks.py | 6 +++---
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py
index b5a8a230f..0f14683f9 100644
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@@ -41,7 +41,7 @@ async def add(
     extraction_rules: Optional[Dict[str, Any]] = None,
     tavily_config: Optional[BaseModel] = None,
     soup_crawler_config: Optional[BaseModel] = None,
-    data_batch_size: Optional[int] = 20,
+    data_per_batch: Optional[int] = 20,
 ):
     """
     Add data to Cognee for knowledge graph processing.
@@ -236,7 +236,7 @@ async def add(
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
-        data_batch_size=data_batch_size,
+        data_per_batch=data_per_batch,
     ):
         pipeline_run_info = run_info
 
diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index ab5e4a023..1eb266765 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -51,7 +51,7 @@ async def cognify(
     incremental_loading: bool = True,
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
-    data_batch_size: int = 20,
+    data_per_batch: int = 20,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -229,7 +229,7 @@ async def cognify(
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
         pipeline_name="cognify_pipeline",
-        data_batch_size=data_batch_size,
+        data_per_batch=data_per_batch,
     )
 
 
diff --git a/cognee/modules/pipelines/operations/pipeline.py b/cognee/modules/pipelines/operations/pipeline.py
index 9d61235c1..e15e9e505 100644
--- a/cognee/modules/pipelines/operations/pipeline.py
+++ b/cognee/modules/pipelines/operations/pipeline.py
@@ -35,7 +35,7 @@ async def run_pipeline(
     vector_db_config: dict = None,
     graph_db_config: dict = None,
     incremental_loading: bool = False,
-    data_batch_size: int = 20,
+    data_per_batch: int = 20,
 ):
     validate_pipeline_tasks(tasks)
     await setup_and_check_environment(vector_db_config, graph_db_config)
@@ -51,7 +51,7 @@ async def run_pipeline(
             pipeline_name=pipeline_name,
             context={"dataset": dataset},
             incremental_loading=incremental_loading,
-            data_batch_size=data_batch_size,
+            data_per_batch=data_per_batch,
         ):
             yield run_info
 
@@ -64,7 +64,7 @@ async def run_pipeline_per_dataset(
     pipeline_name: str = "custom_pipeline",
     context: dict = None,
     incremental_loading=False,
-    data_batch_size: int = 20,
+    data_per_batch: int = 20,
 ):
     # Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
     await set_database_global_context_variables(dataset.id, dataset.owner_id)
@@ -80,7 +80,7 @@ async def run_pipeline_per_dataset(
         return
 
     pipeline_run = run_tasks(
-        tasks, dataset.id, data, user, pipeline_name, context, incremental_loading, data_batch_size
+        tasks, dataset.id, data, user, pipeline_name, context, incremental_loading, data_per_batch
     )
 
     async for pipeline_run_info in pipeline_run:
diff --git a/cognee/modules/pipelines/operations/run_tasks.py b/cognee/modules/pipelines/operations/run_tasks.py
index 18eaf8011..ecc2f647b 100644
--- a/cognee/modules/pipelines/operations/run_tasks.py
+++ b/cognee/modules/pipelines/operations/run_tasks.py
@@ -59,7 +59,7 @@ async def run_tasks(
     pipeline_name: str = "unknown_pipeline",
     context: dict = None,
     incremental_loading: bool = False,
-    data_batch_size: int = 20,
+    data_per_batch: int = 20,
 ):
     if not user:
         user = await get_default_user()
@@ -91,8 +91,8 @@ async def run_tasks(
 
         # Create and gather batches of async tasks of data items that will run the pipeline for the data item
         results = []
-        for start in range(0, len(data), data_batch_size):
-            data_batch = data[start : start + data_batch_size]
+        for start in range(0, len(data), data_per_batch):
+            data_batch = data[start : start + data_per_batch]
 
             data_item_tasks = [
                 asyncio.create_task(

From 88cc7af4d7b41b764ddb9db8517ddd56d04677a8 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 16 Oct 2025 10:50:50 +0200
Subject: [PATCH 38/61] test: Add a few more examples to the workflow.

---
 .github/workflows/examples_tests.yml | 53 ++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml
index 406420351..df007a576 100644
--- a/.github/workflows/examples_tests.yml
+++ b/.github/workflows/examples_tests.yml
@@ -85,8 +85,8 @@ jobs:
         run: uv run python ./cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
 
 
-  test-multiple-examples:
-    name: Run Multiple Example Scripts
+  test-dynamic-steps-metrics:
+    name: Run Dynamic Steps Example
     runs-on: ubuntu-22.04
     steps:
       - name: Check out repository
@@ -97,7 +97,7 @@ jobs:
         with:
           python-version: '3.11.x'
 
-      - name: Run Dynamic Steps Example
+      - name: Run Dynamic Steps Tests
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           LLM_MODEL: ${{ secrets.LLM_MODEL }}
@@ -110,6 +110,18 @@ jobs:
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
         run: uv run python ./examples/python/dynamic_steps_example.py
 
+  test-temporal-example:
+    name: Run Temporal Tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
       - name: Run Temporal Example
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -123,6 +135,18 @@ jobs:
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
         run: uv run python ./examples/python/temporal_example.py
 
+  test-ontology-example:
+    name: Run Ontology Tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
       - name: Run Ontology Demo Example
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -136,18 +160,17 @@ jobs:
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
         run: uv run python ./examples/python/ontology_demo_example.py
 
-      - name: Run Temporal Example
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          LLM_MODEL: ${{ secrets.LLM_MODEL }}
-          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
-          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
-          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
-          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
-          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
-          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
-          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
-        run: uv run python ./examples/python/temporal_example.py
+  test-agentic-reasoning:
+    name: Run Agentic Reasoning Tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
 
       - name: Run Agentic Reasoning Example
         env:

From 9821a01a478aab77d08470b9e5a87a7a23e6c750 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Thu, 16 Oct 2025 15:48:20 +0200
Subject: [PATCH 39/61] feat: Redis lock integration and Kuzu agentic access
 fix (#1504)

<!-- .github/pull_request_template.md -->

## Description
This PR introduces a shared locked mechanism in KuzuAdapter to avoid use
case when multiple subprocesses from different environments are trying
to use the same Kuzu adatabase.

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [x] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [x] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
None

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have added necessary documentation (if applicable)
- [x] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [x] I have linked any relevant issues in the description
- [x] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 .github/actions/cognee_setup/action.yml       |    2 +-
 .github/workflows/e2e_tests.yml               |   67 +
 .../databases/cache/__init__.py               |    2 +
 .../databases/cache/cache_db_interface.py     |   42 +
 .../infrastructure/databases/cache/config.py  |   39 +
 .../databases/cache/get_cache_engine.py       |   59 +
 .../databases/cache/redis/RedisAdapter.py     |   49 +
 .../databases/graph/kuzu/adapter.py           |  100 +-
 cognee/tests/subprocesses/reader.py           |   25 +
 cognee/tests/subprocesses/simple_cognify_1.py |   31 +
 cognee/tests/subprocesses/simple_cognify_2.py |   31 +
 cognee/tests/subprocesses/writer.py           |   32 +
 .../test_concurrent_subprocess_access.py      |   84 ++
 .../databases/cache/test_cache_config.py      |   87 ++
 docker-compose.yml                            |   25 +
 poetry.lock                                   | 1316 +++++++++++++----
 pyproject.toml                                |    1 +
 uv.lock                                       |   19 +-
 .../run_subprocess_test.py                    |   31 +
 19 files changed, 1671 insertions(+), 371 deletions(-)
 create mode 100644 cognee/infrastructure/databases/cache/__init__.py
 create mode 100644 cognee/infrastructure/databases/cache/cache_db_interface.py
 create mode 100644 cognee/infrastructure/databases/cache/config.py
 create mode 100644 cognee/infrastructure/databases/cache/get_cache_engine.py
 create mode 100644 cognee/infrastructure/databases/cache/redis/RedisAdapter.py
 create mode 100644 cognee/tests/subprocesses/reader.py
 create mode 100644 cognee/tests/subprocesses/simple_cognify_1.py
 create mode 100644 cognee/tests/subprocesses/simple_cognify_2.py
 create mode 100644 cognee/tests/subprocesses/writer.py
 create mode 100644 cognee/tests/test_concurrent_subprocess_access.py
 create mode 100644 cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py
 create mode 100644 working_dir_error_replication/run_subprocess_test.py

diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml
index 1326f2d81..4017d524b 100644
--- a/.github/actions/cognee_setup/action.yml
+++ b/.github/actions/cognee_setup/action.yml
@@ -41,4 +41,4 @@ runs:
             EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
           done
         fi
-        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
+        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS
diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index 3fe7a7992..9582a3f3b 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -1,4 +1,6 @@
 name: Reusable Integration Tests
+permissions:
+  contents: read
 
 on:
   workflow_call:
@@ -264,3 +266,68 @@ jobs:
           EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
         run: uv run python ./cognee/tests/test_edge_ingestion.py
+
+
+
+  run_concurrent_subprocess_access_test:
+    name: Concurrent Subprocess access test
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash
+    services:
+      postgres:
+        image: pgvector/pgvector:pg17
+        env:
+          POSTGRES_USER: cognee
+          POSTGRES_PASSWORD: cognee
+          POSTGRES_DB: cognee_db
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 5432:5432
+
+      redis:
+        image: redis:7
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 5s
+          --health-timeout 3s
+          --health-retries 5
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+          extra-dependencies: "postgres redis"
+
+      - name: Run Concurrent subprocess access test (Kuzu/Lancedb/Postgres)
+        env:
+          ENV: dev
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          GRAPH_DATABASE_PROVIDER: 'kuzu'
+          CACHING: true
+          SHARED_KUZU_LOCK: true
+          DB_PROVIDER: 'postgres'
+          DB_NAME: 'cognee_db'
+          DB_HOST: '127.0.0.1'
+          DB_PORT: 5432
+          DB_USERNAME: cognee
+          DB_PASSWORD: cognee
+        run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py
\ No newline at end of file
diff --git a/cognee/infrastructure/databases/cache/__init__.py b/cognee/infrastructure/databases/cache/__init__.py
new file mode 100644
index 000000000..d96c77658
--- /dev/null
+++ b/cognee/infrastructure/databases/cache/__init__.py
@@ -0,0 +1,2 @@
+from .get_cache_engine import get_cache_engine
+from .config import get_cache_config
diff --git a/cognee/infrastructure/databases/cache/cache_db_interface.py b/cognee/infrastructure/databases/cache/cache_db_interface.py
new file mode 100644
index 000000000..0c0b578f8
--- /dev/null
+++ b/cognee/infrastructure/databases/cache/cache_db_interface.py
@@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+from contextlib import contextmanager
+
+
+class CacheDBInterface(ABC):
+    """
+    Abstract base class for distributed cache coordination systems (e.g., Redis, Memcached).
+    Provides a common interface for lock acquisition, release, and context-managed locking.
+    """
+
+    def __init__(self, host: str, port: int, lock_key: str):
+        self.host = host
+        self.port = port
+        self.lock_key = lock_key
+        self.lock = None
+
+    @abstractmethod
+    def acquire_lock(self):
+        """
+        Acquire a lock on the given key.
+        Must be implemented by subclasses.
+        """
+        pass
+
+    @abstractmethod
+    def release_lock(self):
+        """
+        Release the lock if it is held.
+        Must be implemented by subclasses.
+        """
+        pass
+
+    @contextmanager
+    def hold_lock(self):
+        """
+        Context manager for safely acquiring and releasing the lock.
+        """
+        self.acquire()
+        try:
+            yield
+        finally:
+            self.release()
diff --git a/cognee/infrastructure/databases/cache/config.py b/cognee/infrastructure/databases/cache/config.py
new file mode 100644
index 000000000..b399e0259
--- /dev/null
+++ b/cognee/infrastructure/databases/cache/config.py
@@ -0,0 +1,39 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from functools import lru_cache
+
+
+class CacheConfig(BaseSettings):
+    """
+    Configuration for distributed cache systems (e.g., Redis), used for locking or coordination.
+
+    Attributes:
+    - shared_kuzu_lock: Shared kuzu lock logic on/off.
+    - cache_host: Hostname of the cache service.
+    - cache_port: Port number for the cache service.
+    - agentic_lock_expire: Automatic lock expiration time (in seconds).
+    - agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
+    """
+
+    caching: bool = False
+    shared_kuzu_lock: bool = False
+    cache_host: str = "localhost"
+    cache_port: int = 6379
+    agentic_lock_expire: int = 240
+    agentic_lock_timeout: int = 300
+
+    model_config = SettingsConfigDict(env_file=".env", extra="allow")
+
+    def to_dict(self) -> dict:
+        return {
+            "caching": self.caching,
+            "shared_kuzu_lock": self.shared_kuzu_lock,
+            "cache_host": self.cache_host,
+            "cache_port": self.cache_port,
+            "agentic_lock_expire": self.agentic_lock_expire,
+            "agentic_lock_timeout": self.agentic_lock_timeout,
+        }
+
+
+@lru_cache
+def get_cache_config():
+    return CacheConfig()
diff --git a/cognee/infrastructure/databases/cache/get_cache_engine.py b/cognee/infrastructure/databases/cache/get_cache_engine.py
new file mode 100644
index 000000000..92186f877
--- /dev/null
+++ b/cognee/infrastructure/databases/cache/get_cache_engine.py
@@ -0,0 +1,59 @@
+"""Factory to get the appropriate cache coordination engine (e.g., Redis)."""
+
+from functools import lru_cache
+from cognee.infrastructure.databases.cache.config import get_cache_config
+
+from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
+
+config = get_cache_config()
+
+
+@lru_cache
+def create_cache_engine(
+    cache_host: str,
+    cache_port: int,
+    lock_key: str,
+    agentic_lock_expire: int = 240,
+    agentic_lock_timeout: int = 300,
+):
+    """
+    Factory function to instantiate a cache coordination backend (currently Redis).
+
+    Parameters:
+    -----------
+    - cache_host: Hostname or IP of the cache server.
+    - cache_port: Port number to connect to.
+    - lock_key: Identifier used for the locking resource.
+    - agentic_lock_expire: Duration to hold the lock after acquisition.
+    - agentic_lock_timeout: Max time to wait for the lock before failing.
+
+    Returns:
+    --------
+    - CacheDBInterface: An instance of the appropriate cache adapter. :TODO: Now we support only Redis. later if we add more here we can split the logic
+    """
+    if config.caching:
+        from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter
+
+        return RedisAdapter(
+            host=cache_host,
+            port=cache_port,
+            lock_name=lock_key,
+            timeout=agentic_lock_expire,
+            blocking_timeout=agentic_lock_timeout,
+        )
+    else:
+        return None
+
+
+def get_cache_engine(lock_key: str) -> CacheDBInterface:
+    """
+    Returns a cache adapter instance using current context configuration.
+    """
+
+    return create_cache_engine(
+        cache_host=config.cache_host,
+        cache_port=config.cache_port,
+        lock_key=lock_key,
+        agentic_lock_expire=config.agentic_lock_expire,
+        agentic_lock_timeout=config.agentic_lock_timeout,
+    )
diff --git a/cognee/infrastructure/databases/cache/redis/RedisAdapter.py b/cognee/infrastructure/databases/cache/redis/RedisAdapter.py
new file mode 100644
index 000000000..70c8de9bb
--- /dev/null
+++ b/cognee/infrastructure/databases/cache/redis/RedisAdapter.py
@@ -0,0 +1,49 @@
+import redis
+from contextlib import contextmanager
+from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
+
+
+class RedisAdapter(CacheDBInterface):
+    def __init__(self, host, port, lock_name, timeout=240, blocking_timeout=300):
+        super().__init__(host, port, lock_name)
+        self.redis = redis.Redis(host=host, port=port)
+        self.timeout = timeout
+        self.blocking_timeout = blocking_timeout
+
+    def acquire_lock(self):
+        """
+        Acquire the Redis lock manually. Raises if acquisition fails.
+        """
+        self.lock = self.redis.lock(
+            name=self.lock_key,
+            timeout=self.timeout,
+            blocking_timeout=self.blocking_timeout,
+        )
+
+        acquired = self.lock.acquire()
+        if not acquired:
+            raise RuntimeError(f"Could not acquire Redis lock: {self.lock_key}")
+
+        return self.lock
+
+    def release_lock(self):
+        """
+        Release the Redis lock manually, if held.
+        """
+        if self.lock:
+            try:
+                self.lock.release()
+                self.lock = None
+            except redis.exceptions.LockError:
+                pass
+
+    @contextmanager
+    def hold_lock(self):
+        """
+        Context manager for acquiring and releasing the Redis lock automatically.
+        """
+        self.acquire()
+        try:
+            yield
+        finally:
+            self.release()
diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py
index 7b772097f..3f0fb0c57 100644
--- a/cognee/infrastructure/databases/graph/kuzu/adapter.py
+++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py
@@ -4,7 +4,7 @@ import os
 import json
 import asyncio
 import tempfile
-from uuid import UUID
+from uuid import UUID, uuid5, NAMESPACE_OID
 from kuzu import Connection
 from kuzu.database import Database
 from datetime import datetime, timezone
@@ -23,9 +23,14 @@ from cognee.infrastructure.engine import DataPoint
 from cognee.modules.storage.utils import JSONEncoder
 from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
 from cognee.tasks.temporal_graph.models import Timestamp
+from cognee.infrastructure.databases.cache.config import get_cache_config
 
 logger = get_logger()
 
+cache_config = get_cache_config()
+if cache_config.shared_kuzu_lock:
+    from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
+
 
 class KuzuAdapter(GraphDBInterface):
     """
@@ -39,12 +44,20 @@ class KuzuAdapter(GraphDBInterface):
 
     def __init__(self, db_path: str):
         """Initialize Kuzu database connection and schema."""
+        self.open_connections = 0
+        self._is_closed = False
         self.db_path = db_path  # Path for the database directory
         self.db: Optional[Database] = None
         self.connection: Optional[Connection] = None
-        self.executor = ThreadPoolExecutor()
-        self._initialize_connection()
+        if cache_config.shared_kuzu_lock:
+            self.redis_lock = get_cache_engine(
+                lock_key="kuzu-lock-" + str(uuid5(NAMESPACE_OID, db_path))
+            )
+        else:
+            self.executor = ThreadPoolExecutor()
+            self._initialize_connection()
         self.KUZU_ASYNC_LOCK = asyncio.Lock()
+        self._connection_change_lock = asyncio.Lock()
 
     def _initialize_connection(self) -> None:
         """Initialize the Kuzu database connection and schema."""
@@ -209,9 +222,13 @@ class KuzuAdapter(GraphDBInterface):
         params = params or {}
 
         def blocking_query():
+            lock_acquired = False
             try:
+                if cache_config.shared_kuzu_lock:
+                    self.redis_lock.acquire_lock()
+                    lock_acquired = True
                 if not self.connection:
-                    logger.debug("Reconnecting to Kuzu database...")
+                    logger.info("Reconnecting to Kuzu database...")
                     self._initialize_connection()
 
                 result = self.connection.execute(query, params)
@@ -225,12 +242,47 @@ class KuzuAdapter(GraphDBInterface):
                             val = val.as_py()
                         processed_rows.append(val)
                     rows.append(tuple(processed_rows))
+
                 return rows
             except Exception as e:
                 logger.error(f"Query execution failed: {str(e)}")
                 raise
+            finally:
+                if cache_config.shared_kuzu_lock and lock_acquired:
+                    try:
+                        self.close()
+                    finally:
+                        self.redis_lock.release_lock()
 
-        return await loop.run_in_executor(self.executor, blocking_query)
+        if cache_config.shared_kuzu_lock:
+            async with self._connection_change_lock:
+                self.open_connections += 1
+                logger.info(f"Open connections after open: {self.open_connections}")
+                try:
+                    result = blocking_query()
+                finally:
+                    self.open_connections -= 1
+                    logger.info(f"Open connections after close: {self.open_connections}")
+                return result
+        else:
+            result = await loop.run_in_executor(self.executor, blocking_query)
+            return result
+
+    def close(self):
+        if self.connection:
+            del self.connection
+            self.connection = None
+        if self.db:
+            del self.db
+            self.db = None
+        self._is_closed = True
+        logger.info("Kuzu database closed successfully")
+
+    def reopen(self):
+        if self._is_closed:
+            self._is_closed = False
+            self._initialize_connection()
+            logger.info("Kuzu database re-opened successfully")
 
     @asynccontextmanager
     async def get_session(self):
@@ -1557,44 +1609,6 @@ class KuzuAdapter(GraphDBInterface):
             logger.error(f"Failed to delete graph data: {e}")
             raise
 
-    async def clear_database(self) -> None:
-        """
-        Clear all data from the database by deleting the database files and reinitializing.
-
-        This method removes all files associated with the database and reinitializes the Kuzu
-        database structure, ensuring a completely empty state. It handles exceptions that might
-        occur during file deletions or initializations carefully.
-        """
-        try:
-            if self.connection:
-                self.connection = None
-            if self.db:
-                self.db.close()
-                self.db = None
-
-            db_dir = os.path.dirname(self.db_path)
-            db_name = os.path.basename(self.db_path)
-            file_storage = get_file_storage(db_dir)
-
-            if await file_storage.file_exists(db_name):
-                await file_storage.remove_all()
-                logger.info(f"Deleted Kuzu database files at {self.db_path}")
-
-            # Reinitialize the database
-            self._initialize_connection()
-            # Verify the database is empty
-            result = self.connection.execute("MATCH (n:Node) RETURN COUNT(n)")
-            count = result.get_next()[0] if result.has_next() else 0
-            if count > 0:
-                logger.warning(
-                    f"Database still contains {count} nodes after clearing, forcing deletion"
-                )
-                self.connection.execute("MATCH (n:Node) DETACH DELETE n")
-            logger.info("Database cleared successfully")
-        except Exception as e:
-            logger.error(f"Error during database clearing: {e}")
-            raise
-
     async def get_document_subgraph(self, data_id: str):
         """
         Get all nodes that should be deleted when removing a document.
diff --git a/cognee/tests/subprocesses/reader.py b/cognee/tests/subprocesses/reader.py
new file mode 100644
index 000000000..df54a63e4
--- /dev/null
+++ b/cognee/tests/subprocesses/reader.py
@@ -0,0 +1,25 @@
+import asyncio
+import time
+from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
+
+# This will create the test.db if it doesn't exist
+
+
+async def main():
+    adapter = KuzuAdapter("test.db")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/cognee/tests/subprocesses/simple_cognify_1.py b/cognee/tests/subprocesses/simple_cognify_1.py
new file mode 100644
index 000000000..cf4d65c88
--- /dev/null
+++ b/cognee/tests/subprocesses/simple_cognify_1.py
@@ -0,0 +1,31 @@
+import asyncio
+import cognee
+from cognee.shared.logging_utils import setup_logging, INFO
+from cognee.api.v1.search import SearchType
+
+
+async def main():
+    await cognee.cognify(datasets=["first_cognify_dataset"])
+
+    query_text = (
+        "Tell me what is in the context. Additionally write out 'FIRST_COGNIFY' before your answer"
+    )
+    search_results = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text=query_text,
+        datasets=["first_cognify_dataset"],
+    )
+
+    print("Search results:")
+    for result_text in search_results:
+        print(result_text)
+
+
+if __name__ == "__main__":
+    logger = setup_logging(log_level=INFO)
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.run_until_complete(loop.shutdown_asyncgens())
diff --git a/cognee/tests/subprocesses/simple_cognify_2.py b/cognee/tests/subprocesses/simple_cognify_2.py
new file mode 100644
index 000000000..6de5035ec
--- /dev/null
+++ b/cognee/tests/subprocesses/simple_cognify_2.py
@@ -0,0 +1,31 @@
+import asyncio
+import cognee
+from cognee.shared.logging_utils import setup_logging, INFO
+from cognee.api.v1.search import SearchType
+
+
+async def main():
+    await cognee.cognify(datasets=["second_cognify_dataset"])
+
+    query_text = (
+        "Tell me what is in the context. Additionally write out 'SECOND_COGNIFY' before your answer"
+    )
+    search_results = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text=query_text,
+        datasets=["second_cognify_dataset"],
+    )
+
+    print("Search results:")
+    for result_text in search_results:
+        print(result_text)
+
+
+if __name__ == "__main__":
+    logger = setup_logging(log_level=INFO)
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.run_until_complete(loop.shutdown_asyncgens())
diff --git a/cognee/tests/subprocesses/writer.py b/cognee/tests/subprocesses/writer.py
new file mode 100644
index 000000000..27d00caba
--- /dev/null
+++ b/cognee/tests/subprocesses/writer.py
@@ -0,0 +1,32 @@
+import asyncio
+import time
+import uuid
+from cognee.modules.data.processing.document_types import PdfDocument
+from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
+
+
+def create_node(name):
+    document = PdfDocument(
+        id=uuid.uuid4(),
+        name=name,
+        raw_data_location=name,
+        external_metadata="test_external_metadata",
+        mime_type="test_mime",
+    )
+    return document
+
+
+async def main():
+    adapter = KuzuAdapter("test.db")
+    nodes = [create_node(f"Node{i}") for i in range(5)]
+
+    print("Writer: Starting...")
+    await adapter.add_nodes(nodes)
+
+    print("writer finished...")
+
+    time.sleep(10)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/cognee/tests/test_concurrent_subprocess_access.py b/cognee/tests/test_concurrent_subprocess_access.py
new file mode 100644
index 000000000..de03ed254
--- /dev/null
+++ b/cognee/tests/test_concurrent_subprocess_access.py
@@ -0,0 +1,84 @@
+import os
+import asyncio
+import cognee
+import pathlib
+import subprocess
+
+from cognee.shared.logging_utils import get_logger
+
+logger = get_logger()
+
+"""
+Test: Redis-based Kùzu Locking Across Subprocesses
+
+This test ensures the Redis shared lock correctly serializes access to the Kùzu
+database when multiple subprocesses (writer/reader and cognify tasks) run in parallel.
+If this test fails, it indicates the locking mechanism is not properly handling
+concurrent subprocess access.
+"""
+
+
+async def concurrent_subprocess_access():
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/concurrent_tasks")
+        ).resolve()
+    )
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/concurrent_tasks")
+        ).resolve()
+    )
+
+    subprocess_directory_path = str(
+        pathlib.Path(os.path.join(pathlib.Path(__file__).parent, "subprocesses/")).resolve()
+    )
+
+    writer_path = subprocess_directory_path + "/writer.py"
+    reader_path = subprocess_directory_path + "/reader.py"
+
+    cognee.config.data_root_directory(data_directory_path)
+    cognee.config.system_root_directory(cognee_directory_path)
+
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    writer_process = subprocess.Popen([os.sys.executable, str(writer_path)])
+
+    reader_process = subprocess.Popen([os.sys.executable, str(reader_path)])
+
+    # Wait for both processes to complete
+    writer_process.wait()
+    reader_process.wait()
+
+    logger.info("Basic write read subprocess example finished")
+
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    text = """
+            This is the text of the first cognify subprocess
+            """
+    await cognee.add(text, dataset_name="first_cognify_dataset")
+
+    text = """
+            This is the text of the second cognify subprocess
+            """
+    await cognee.add(text, dataset_name="second_cognify_dataset")
+
+    first_cognify_path = subprocess_directory_path + "/simple_cognify_1.py"
+    second_cognify_path = subprocess_directory_path + "/simple_cognify_2.py"
+
+    first_cognify_process = subprocess.Popen([os.sys.executable, str(first_cognify_path)])
+
+    second_cognify_process = subprocess.Popen([os.sys.executable, str(second_cognify_path)])
+
+    # Wait for both processes to complete
+    first_cognify_process.wait()
+    second_cognify_process.wait()
+
+    logger.info("Database concurrent subprocess example finished")
+
+
+if __name__ == "__main__":
+    asyncio.run(concurrent_subprocess_access())
diff --git a/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py b/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py
new file mode 100644
index 000000000..cb5086a70
--- /dev/null
+++ b/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py
@@ -0,0 +1,87 @@
+"""Tests for cache configuration."""
+
+import pytest
+from cognee.infrastructure.databases.cache.config import CacheConfig, get_cache_config
+
+
+def test_cache_config_defaults():
+    """Test that CacheConfig has the correct default values."""
+    config = CacheConfig()
+
+    assert config.caching is False
+    assert config.shared_kuzu_lock is False
+    assert config.cache_host == "localhost"
+    assert config.cache_port == 6379
+    assert config.agentic_lock_expire == 240
+    assert config.agentic_lock_timeout == 300
+
+
+def test_cache_config_custom_values():
+    """Test that CacheConfig accepts custom values."""
+    config = CacheConfig(
+        caching=True,
+        shared_kuzu_lock=True,
+        cache_host="redis.example.com",
+        cache_port=6380,
+        agentic_lock_expire=120,
+        agentic_lock_timeout=180,
+    )
+
+    assert config.caching is True
+    assert config.shared_kuzu_lock is True
+    assert config.cache_host == "redis.example.com"
+    assert config.cache_port == 6380
+    assert config.agentic_lock_expire == 120
+    assert config.agentic_lock_timeout == 180
+
+
+def test_cache_config_to_dict():
+    """Test the to_dict method returns all configuration values."""
+    config = CacheConfig(
+        caching=True,
+        shared_kuzu_lock=True,
+        cache_host="test-host",
+        cache_port=7000,
+        agentic_lock_expire=100,
+        agentic_lock_timeout=200,
+    )
+
+    config_dict = config.to_dict()
+
+    assert config_dict == {
+        "caching": True,
+        "shared_kuzu_lock": True,
+        "cache_host": "test-host",
+        "cache_port": 7000,
+        "agentic_lock_expire": 100,
+        "agentic_lock_timeout": 200,
+    }
+
+
+def test_get_cache_config_singleton():
+    """Test that get_cache_config returns the same instance."""
+    config1 = get_cache_config()
+    config2 = get_cache_config()
+
+    assert config1 is config2
+
+
+def test_cache_config_extra_fields_allowed():
+    """Test that CacheConfig allows extra fields due to extra='allow'."""
+    config = CacheConfig(extra_field="extra_value", another_field=123)
+
+    assert hasattr(config, "extra_field")
+    assert config.extra_field == "extra_value"
+    assert hasattr(config, "another_field")
+    assert config.another_field == 123
+
+
+def test_cache_config_boolean_type_validation():
+    """Test that boolean fields accept various truthy/falsy values."""
+    config1 = CacheConfig(caching="true", shared_kuzu_lock="yes")
+    assert config1.caching is True
+    assert config1.shared_kuzu_lock is True
+
+    config2 = CacheConfig(caching="false", shared_kuzu_lock="no")
+    assert config2.caching is False
+    assert config2.shared_kuzu_lock is False
diff --git a/docker-compose.yml b/docker-compose.yml
index 9f0e199a0..43d9b2607 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -129,6 +129,30 @@ services:
     networks:
       - cognee-network
 
+  redis:
+    image: redis:7-alpine
+    container_name: redis
+    profiles:
+      - redis
+    ports:
+      - "6379:6379"
+    networks:
+      - cognee-network
+    volumes:
+      - redis_data:/data
+    command: [ "redis-server", "--appendonly", "yes" ]
+
+
+  redisinsight:
+    image: redislabs/redisinsight:latest
+    container_name: redisinsight
+    restart: always
+    ports:
+      - "5540:5540"
+    networks:
+      - cognee-network
+
+
 networks:
   cognee-network:
     name: cognee-network
@@ -136,3 +160,4 @@ networks:
 volumes:
   chromadb_data:
   postgres_data:
+  redis_data:
diff --git a/poetry.lock b/poetry.lock
index 120de4f7f..62ae7be8d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -7,7 +7,7 @@ description = "Accelerate"
 optional = true
 python-versions = ">=3.9.0"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "accelerate-1.10.1-py3-none-any.whl", hash = "sha256:3621cff60b9a27ce798857ece05e2b9f56fcc71631cfb31ccf71f0359c311f11"},
     {file = "accelerate-1.10.1.tar.gz", hash = "sha256:3dea89e433420e4bfac0369cae7e36dcd6a56adfcfd38cdda145c6225eab5df8"},
@@ -315,7 +315,7 @@ description = "ANTLR 4.9.3 runtime for Python 3.7"
 optional = true
 python-versions = "*"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docling\" or extra == \"docs\""
 files = [
     {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"},
 ]
@@ -354,6 +354,35 @@ files = [
     {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"},
 ]
 
+[[package]]
+name = "apscheduler"
+version = "3.11.0"
+description = "In-process task scheduler with Cron-like capabilities"
+optional = true
+python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"scraping\""
+files = [
+    {file = "APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da"},
+    {file = "apscheduler-3.11.0.tar.gz", hash = "sha256:4c622d250b0955a65d5d0eb91c33e6d43fd879834bf541e0a18661ae60460133"},
+]
+
+[package.dependencies]
+tzlocal = ">=3.0"
+
+[package.extras]
+doc = ["packaging", "sphinx", "sphinx-rtd-theme (>=1.3.0)"]
+etcd = ["etcd3", "protobuf (<=3.21.0)"]
+gevent = ["gevent"]
+mongodb = ["pymongo (>=3.0)"]
+redis = ["redis (>=3.0)"]
+rethinkdb = ["rethinkdb (>=2.4.0)"]
+sqlalchemy = ["sqlalchemy (>=1.4)"]
+test = ["APScheduler[etcd,mongodb,redis,rethinkdb,sqlalchemy,tornado,zookeeper]", "PySide6 ; platform_python_implementation == \"CPython\" and python_version < \"3.14\"", "anyio (>=4.5.2)", "gevent ; python_version < \"3.14\"", "pytest", "pytz", "twisted ; python_version < \"3.14\""]
+tornado = ["tornado (>=4.3)"]
+twisted = ["twisted"]
+zookeeper = ["kazoo"]
+
 [[package]]
 name = "argon2-cffi"
 version = "23.1.0"
@@ -510,7 +539,7 @@ description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"falkordb\" and python_full_version < \"3.11.3\" or python_version == \"3.10\""
+markers = "extra == \"redis\" and python_full_version < \"3.11.3\" or python_version == \"3.10\""
 files = [
     {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -768,7 +797,7 @@ description = "Screen-scraping library"
 optional = true
 python-versions = ">=3.7.0"
 groups = ["main"]
-markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\""
+markers = "extra == \"scraping\" or extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\" or extra == \"docling\""
 files = [
     {file = "beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a"},
     {file = "beautifulsoup4-4.13.5.tar.gz", hash = "sha256:5e70131382930e7c3de33450a2f54a63d5e4b19386eab43a5b34d594268f3695"},
@@ -878,7 +907,7 @@ description = "Extensible memoizing collections and decorators"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"},
     {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"},
@@ -1203,7 +1232,7 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main"]
-markers = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or os_name == \"nt\" or sys_platform == \"win32\")"
+markers = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"dev\" or sys_platform == \"win32\")"
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -1227,6 +1256,25 @@ humanfriendly = ">=9.1"
 [package.extras]
 cron = ["capturer (>=2.4)"]
 
+[[package]]
+name = "colorlog"
+version = "6.9.0"
+description = "Add colours to the output of Python's logging module."
+optional = true
+python-versions = ">=3.6"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "colorlog-6.9.0-py3-none-any.whl", hash = "sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff"},
+    {file = "colorlog-6.9.0.tar.gz", hash = "sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+
+[package.extras]
+development = ["black", "flake8", "mypy", "pytest", "types-colorama"]
+
 [[package]]
 name = "comm"
 version = "0.2.3"
@@ -1862,7 +1910,7 @@ description = "serialize all of Python"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"dev\""
+markers = "extra == \"dev\" or extra == \"docling\""
 files = [
     {file = "dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049"},
     {file = "dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0"},
@@ -2022,6 +2070,167 @@ idna = ["idna (>=3.10)"]
 trio = ["trio (>=0.30)"]
 wmi = ["wmi (>=1.5.1) ; platform_system == \"Windows\""]
 
+[[package]]
+name = "docling"
+version = "2.56.1"
+description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
+optional = true
+python-versions = "<4.0,>=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "docling-2.56.1-py3-none-any.whl", hash = "sha256:9c84eb4f5a78cd8a3c88f833157154b4e86c8c853b433e5a8ae811f69081938e"},
+    {file = "docling-2.56.1.tar.gz", hash = "sha256:94cdededd8617b3b164ac42ad1c8d73271507b21aecbbaeb16a05b009b0550f5"},
+]
+
+[package.dependencies]
+accelerate = ">=1.0.0,<2"
+beautifulsoup4 = ">=4.12.3,<5.0.0"
+certifi = ">=2024.7.4"
+docling-core = {version = ">=2.48.2,<3.0.0", extras = ["chunking"]}
+docling-ibm-models = ">=3.9.1,<4"
+docling-parse = ">=4.4.0,<5.0.0"
+filetype = ">=1.2.0,<2.0.0"
+huggingface_hub = ">=0.23,<1"
+lxml = ">=4.0.0,<6.0.0"
+marko = ">=2.1.2,<3.0.0"
+ocrmac = {version = ">=1.0.0,<2.0.0", markers = "sys_platform == \"darwin\""}
+openpyxl = ">=3.1.5,<4.0.0"
+pandas = ">=2.1.4,<3.0.0"
+pillow = ">=10.0.0,<12.0.0"
+pluggy = ">=1.0.0,<2.0.0"
+polyfactory = ">=2.22.2"
+pydantic = ">=2.0.0,<3.0.0"
+pydantic-settings = ">=2.3.0,<3.0.0"
+pylatexenc = ">=2.10,<3.0"
+pypdfium2 = ">=4.30.0,<4.30.1 || >4.30.1,<5.0.0"
+python-docx = ">=1.1.2,<2.0.0"
+python-pptx = ">=1.0.2,<2.0.0"
+rapidocr = {version = ">=3.3,<4.0.0", markers = "python_version < \"3.14\""}
+requests = ">=2.32.2,<3.0.0"
+rtree = ">=1.3.0,<2.0.0"
+scipy = ">=1.6.0,<2.0.0"
+tqdm = ">=4.65.0,<5.0.0"
+typer = ">=0.12.5,<0.20.0"
+
+[package.extras]
+asr = ["openai-whisper (>=20250625)"]
+easyocr = ["easyocr (>=1.7,<2.0)"]
+ocrmac = ["ocrmac (>=1.0.0,<2.0.0) ; sys_platform == \"darwin\""]
+rapidocr = ["onnxruntime (>=1.7.0,<2.0.0)", "rapidocr (>=3.3,<4.0.0) ; python_version < \"3.14\""]
+tesserocr = ["tesserocr (>=2.7.1,<3.0.0)"]
+vlm = ["accelerate (>=1.2.1,<2.0.0)", "mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= \"3.10\" and sys_platform == \"darwin\" and platform_machine == \"arm64\"", "qwen-vl-utils (>=0.0.11)", "transformers (>=4.46.0,<5.0.0)", "vllm (>=0.10.0,<1.0.0) ; python_version >= \"3.10\" and sys_platform == \"linux\" and platform_machine == \"x86_64\""]
+
+[[package]]
+name = "docling-core"
+version = "2.48.4"
+description = "A python library to define and validate data types in Docling."
+optional = true
+python-versions = "<4.0,>=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "docling_core-2.48.4-py3-none-any.whl", hash = "sha256:367675c1165d0934ae498fa57ca2d27ef0468aad74dc44a5ab061f5d87882ea1"},
+    {file = "docling_core-2.48.4.tar.gz", hash = "sha256:d87ce3021cdae3d073ce7572a2396b69be3cde82ebf9a74d4bad1e1cdfdfd524"},
+]
+
+[package.dependencies]
+jsonref = ">=1.1.0,<2.0.0"
+jsonschema = ">=4.16.0,<5.0.0"
+latex2mathml = ">=3.77.0,<4.0.0"
+pandas = ">=2.1.4,<3.0.0"
+pillow = ">=10.0.0,<12.0.0"
+pydantic = ">=2.6.0,<2.10.0 || >2.10.0,<2.10.1 || >2.10.1,<2.10.2 || >2.10.2,<3.0.0"
+pyyaml = ">=5.1,<7.0.0"
+semchunk = {version = ">=2.2.0,<3.0.0", optional = true, markers = "extra == \"chunking\""}
+tabulate = ">=0.9.0,<0.10.0"
+transformers = {version = ">=4.34.0,<5.0.0", optional = true, markers = "extra == \"chunking\""}
+typer = ">=0.12.5,<0.20.0"
+typing-extensions = ">=4.12.2,<5.0.0"
+
+[package.extras]
+chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"]
+chunking-openai = ["semchunk", "tiktoken (>=0.9.0,<0.10.0)"]
+
+[[package]]
+name = "docling-ibm-models"
+version = "3.9.1"
+description = "This package contains the AI models used by the Docling PDF conversion package"
+optional = true
+python-versions = "<4.0,>=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "docling_ibm_models-3.9.1-py3-none-any.whl", hash = "sha256:f2d845703877a3ca8853b57775eb8e88a7a9503d4fa110500a2550b8d63d0098"},
+    {file = "docling_ibm_models-3.9.1.tar.gz", hash = "sha256:ac6cd1c2be93437cbb5c1f1a1a4030792a38859a1655b14f25cbc8aec760c351"},
+]
+
+[package.dependencies]
+accelerate = ">=1.2.1,<2.0.0"
+docling-core = ">=2.19.0,<3.0.0"
+huggingface_hub = ">=0.23,<1"
+jsonlines = ">=3.1.0,<4.0.0"
+numpy = ">=1.24.4,<3.0.0"
+opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
+Pillow = ">=10.0.0,<12.0.0"
+pydantic = ">=2.0.0,<3.0.0"
+rtree = ">=1.0.0"
+safetensors = {version = ">=0.4.3,<1", extras = ["torch"]}
+torch = ">=2.2.2,<3.0.0"
+torchvision = ">=0,<1"
+tqdm = ">=4.64.0,<5.0.0"
+transformers = ">=4.42.0,<5.0.0"
+
+[[package]]
+name = "docling-parse"
+version = "4.5.0"
+description = "Simple package to extract text with coordinates from programmatic PDFs"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "docling_parse-4.5.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:52df1c5bbafe5199c090bf47eb802c2fe40173fb438200f9a7cbe401aa1eed74"},
+    {file = "docling_parse-4.5.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:99e353ab01ac5c81318b67f42c4fc83ac4a0b5b4783bc566f19656204acf45f0"},
+    {file = "docling_parse-4.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9223485df491432f5549dd4566c6649ff32f54370701a004673e27e6fa94a9e"},
+    {file = "docling_parse-4.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41ae6a7f0139d48b9ce8e0a7c43be003e6fa9382919a7efa76153bd1cdbb5e21"},
+    {file = "docling_parse-4.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:8beb4f2c79c676b93ab3a14f86586adb51c3d5a2e3c1a902186e4cd6ed0a2e45"},
+    {file = "docling_parse-4.5.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:f830409eb96b063ae9f3f4e676f760b0d9738bcb0708ba6b840b7e0c84c490bd"},
+    {file = "docling_parse-4.5.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0a1a5f3e2f11ea74ab28d9c04b9391fa4b929c4af045c16bfb0da1e377646e54"},
+    {file = "docling_parse-4.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee02646e7a158c9f67d8df0052b544f1240d3c28eefa4658603931c13eac4435"},
+    {file = "docling_parse-4.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c49193988b56133149584fed70b176de85c95fe698849b2acf68fde9df3a93e5"},
+    {file = "docling_parse-4.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:256019969f1edc08b051a90fe739430593aaf7cd59fb18a2e00745f18533ce43"},
+    {file = "docling_parse-4.5.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:d0ea05741721a76cfca6559d7cac283f2b2953915745b439be0ca8557864bb33"},
+    {file = "docling_parse-4.5.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a5f0bcdd6c84acc3f3a4c1f0fb96be7e9cff7a0bdff85f2f13caa80d2a9fac8f"},
+    {file = "docling_parse-4.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c8906d076219a18f4f86b1fec4e4cc3699460e78c88a5731ead48dfbb71835a"},
+    {file = "docling_parse-4.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84186662e4780375de28b1bcb18112b04bd8e6aedb787d96544cc0d687f9629"},
+    {file = "docling_parse-4.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:5688fe4281dac16e807496c0b19587e25c53a9542d12f36b3a8fb2e66de78eb2"},
+    {file = "docling_parse-4.5.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:d8b2a25262a09e956516c4439ae143a66a55212f0ef9945928159caf1346408f"},
+    {file = "docling_parse-4.5.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:368ebdb22ec03aa29b25d2684e51c74f6e167ab6809cd7bb5bb5b97cfe21bf8c"},
+    {file = "docling_parse-4.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7c9e8954118331438eb8da6058da0e3caf12735b47a86af9521e44465bbb2d4"},
+    {file = "docling_parse-4.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24360a0985a8f76ff99c39e533d208bb57427caf96b9ceb585090cd10558f87a"},
+    {file = "docling_parse-4.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:c3dba06a3cb8797587c90f5aa10cc2c51803d8f5cd67342ea948288a30503868"},
+    {file = "docling_parse-4.5.0-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:217fe2466ca2723bdecbdb162ca73891c1746ec15b8d99ec203f8df3305091a5"},
+    {file = "docling_parse-4.5.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:e8b283a93860cdf43a93296e1721e25daeb8eede14417b9f188f0f52c010d6b5"},
+    {file = "docling_parse-4.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:affdecc41ed18f1a82c56edac2b815535e3cc07e2b0f8ffaee7e4adfb1333f0e"},
+    {file = "docling_parse-4.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da6e535463bcb19a64f3099bb73b299e1f6f49a1ef3b0b3ea4fa62e2790ad875"},
+    {file = "docling_parse-4.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:dac5e9907cd6fd020bc1620082dacb9b99bfc9ee4001c55c4e4ce156edf3b617"},
+    {file = "docling_parse-4.5.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f983d65703a165b76775c3e4b2a5cade4757216eb88faf5c0c86a9b33f38549a"},
+    {file = "docling_parse-4.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:9d02c43d3185f5f4a6d5aaad38e69e07bbd1f965fd62f331bd9dfc006a637604"},
+    {file = "docling_parse-4.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9bf94bc213bedd6d880d94eface2285e9e344da5452a23b3a8d0fedecb5d3ec1"},
+    {file = "docling_parse-4.5.0.tar.gz", hash = "sha256:e78f648c3a8af5ddb7dcc30c6c4270e9d3257366396a020ad60657de98bf88f5"},
+]
+
+[package.dependencies]
+docling-core = ">=2.44.1"
+pillow = ">=10.0.0,<12.0.0"
+pydantic = ">=2.0.0"
+pywin32 = {version = ">=305", markers = "sys_platform == \"win32\""}
+tabulate = ">=0.9.0,<1.0.0"
+
+[package.extras]
+perf-tools = ["pdfplumber (>=0.11.7)", "pymupdf (>=1.26.4)", "pypdfium2 (>=4.30.0)"]
+
 [[package]]
 name = "docstring-parser"
 version = "0.17.0"
@@ -2111,7 +2320,7 @@ description = "An implementation of lxml.xmlfile for the standard library"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"},
     {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"},
@@ -2184,21 +2393,20 @@ files = [
 tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
 
 [[package]]
-name = "falkordb"
-version = "1.2.0"
-description = "Python client for interacting with FalkorDB database"
+name = "faker"
+version = "37.11.0"
+description = "Faker is a Python package that generates fake data for you."
 optional = true
-python-versions = "<4.0,>=3.8"
+python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"falkordb\""
+markers = "extra == \"docling\""
 files = [
-    {file = "falkordb-1.2.0-py3-none-any.whl", hash = "sha256:7572d9cc377735d22efc52fe6fe73c7a435422c827b6ea3ca223a850a77be12e"},
-    {file = "falkordb-1.2.0.tar.gz", hash = "sha256:ce57365b86722d538e75aa5d438de67ecd8eb9478da612506d9812cd7f182d0b"},
+    {file = "faker-37.11.0-py3-none-any.whl", hash = "sha256:1508d2da94dfd1e0087b36f386126d84f8583b3de19ac18e392a2831a6676c57"},
+    {file = "faker-37.11.0.tar.gz", hash = "sha256:22969803849ba0618be8eee2dd01d0d9e2cd3b75e6ff1a291fa9abcdb34da5e6"},
 ]
 
 [package.dependencies]
-python-dateutil = ">=2.9.0,<3.0.0"
-redis = ">=5.0.1,<6.0.0"
+tzdata = "*"
 
 [[package]]
 name = "fastapi"
@@ -2722,28 +2930,6 @@ files = [
     {file = "giturlparse-0.12.0.tar.gz", hash = "sha256:c0fff7c21acc435491b1779566e038757a205c1ffdcb47e4f81ea52ad8c3859a"},
 ]
 
-[[package]]
-name = "google-ai-generativelanguage"
-version = "0.6.15"
-description = "Google Ai Generativelanguage API client library"
-optional = true
-python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google_ai_generativelanguage-0.6.15-py3-none-any.whl", hash = "sha256:5a03ef86377aa184ffef3662ca28f19eeee158733e45d7947982eb953c6ebb6c"},
-    {file = "google_ai_generativelanguage-0.6.15.tar.gz", hash = "sha256:8f6d9dc4c12b065fe2d0289026171acea5183ebf2d0b11cefe12f3821e159ec3"},
-]
-
-[package.dependencies]
-google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
-google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
-proto-plus = [
-    {version = ">=1.22.3,<2.0.0dev"},
-    {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
-]
-protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
-
 [[package]]
 name = "google-api-core"
 version = "2.25.1"
@@ -2751,7 +2937,7 @@ description = "Google API client core library"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\""
+markers = "extra == \"docs\""
 files = [
     {file = "google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7"},
     {file = "google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8"},
@@ -2781,26 +2967,6 @@ grpc = ["grpcio (>=1.33.2,<2.0.0)", "grpcio (>=1.49.1,<2.0.0) ; python_version >
 grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
 grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
 
-[[package]]
-name = "google-api-python-client"
-version = "2.182.0"
-description = "Google API Client Library for Python"
-optional = true
-python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google_api_python_client-2.182.0-py3-none-any.whl", hash = "sha256:a9b071036d41a17991d8fbf27bedb61f2888a39ae5696cb5a326bf999b2d5209"},
-    {file = "google_api_python_client-2.182.0.tar.gz", hash = "sha256:cb2aa127e33c3a31e89a06f39cf9de982db90a98dee020911b21013afafad35f"},
-]
-
-[package.dependencies]
-google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0"
-google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
-google-auth-httplib2 = ">=0.2.0,<1.0.0"
-httplib2 = ">=0.19.0,<1.0.0"
-uritemplate = ">=3.0.1,<5"
-
 [[package]]
 name = "google-auth"
 version = "2.40.3"
@@ -2808,7 +2974,7 @@ description = "Google Authentication Library"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca"},
     {file = "google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77"},
@@ -2829,23 +2995,6 @@ requests = ["requests (>=2.20.0,<3.0.0)"]
 testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"]
 urllib3 = ["packaging", "urllib3"]
 
-[[package]]
-name = "google-auth-httplib2"
-version = "0.2.0"
-description = "Google Authentication Library: httplib2 transport"
-optional = true
-python-versions = "*"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"},
-    {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"},
-]
-
-[package.dependencies]
-google-auth = "*"
-httplib2 = ">=0.19.0"
-
 [[package]]
 name = "google-cloud-vision"
 version = "3.10.2"
@@ -2895,31 +3044,6 @@ websockets = ">=13.0.0,<15.1.0"
 aiohttp = ["aiohttp (<4.0.0)"]
 local-tokenizer = ["protobuf", "sentencepiece (>=0.2.0)"]
 
-[[package]]
-name = "google-generativeai"
-version = "0.8.5"
-description = "Google Generative AI High level API client library and tools."
-optional = true
-python-versions = ">=3.9"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google_generativeai-0.8.5-py3-none-any.whl", hash = "sha256:22b420817fb263f8ed520b33285f45976d5b21e904da32b80d4fd20c055123a2"},
-]
-
-[package.dependencies]
-google-ai-generativelanguage = "0.6.15"
-google-api-core = "*"
-google-api-python-client = "*"
-google-auth = ">=2.15.0"
-protobuf = "*"
-pydantic = "*"
-tqdm = "*"
-typing-extensions = "*"
-
-[package.extras]
-dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "pyyaml"]
-
 [[package]]
 name = "googleapis-common-protos"
 version = "1.70.0"
@@ -2927,7 +3051,7 @@ description = "Common protobufs used in Google APIs"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
 files = [
     {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"},
     {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"},
@@ -2968,7 +3092,7 @@ description = "Lightweight in-process concurrent programming"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""
+markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\" or extra == \"scraping\""
 files = [
     {file = "greenlet-3.2.4-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8c68325b0d0acf8d91dde4e6f930967dd52a5302cd4062932a6b2e7c2969f47c"},
     {file = "greenlet-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:94385f101946790ae13da500603491f04a76b6e4c059dab271b3ce2e283b2590"},
@@ -3077,7 +3201,7 @@ description = "HTTP/2-based RPC framework"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
 files = [
     {file = "grpcio-1.75.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:1ec9cbaec18d9597c718b1ed452e61748ac0b36ba350d558f9ded1a94cc15ec7"},
     {file = "grpcio-1.75.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7ee5ee42bfae8238b66a275f9ebcf6f295724375f2fa6f3b52188008b6380faf"},
@@ -3145,7 +3269,7 @@ description = "Status proto mapping for gRPC"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\""
+markers = "extra == \"docs\""
 files = [
     {file = "grpcio_status-1.71.2-py3-none-any.whl", hash = "sha256:803c98cb6a8b7dc6dbb785b1111aed739f241ab5e9da0bba96888aa74704cfd3"},
     {file = "grpcio_status-1.71.2.tar.gz", hash = "sha256:c7a97e176df71cdc2c179cd1847d7fc86cca5832ad12e9798d7fed6b7a1aab50"},
@@ -3337,22 +3461,6 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 trio = ["trio (>=0.22.0,<1.0)"]
 
-[[package]]
-name = "httplib2"
-version = "0.31.0"
-description = "A comprehensive HTTP client library."
-optional = true
-python-versions = ">=3.6"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "httplib2-0.31.0-py3-none-any.whl", hash = "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24"},
-    {file = "httplib2-0.31.0.tar.gz", hash = "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c"},
-]
-
-[package.dependencies]
-pyparsing = ">=3.0.4,<4"
-
 [[package]]
 name = "httptools"
 version = "0.6.4"
@@ -4020,6 +4128,22 @@ files = [
 [package.extras]
 dev = ["build (==1.2.2.post1)", "coverage (==7.5.4) ; python_version < \"3.9\"", "coverage (==7.8.0) ; python_version >= \"3.9\"", "mypy (==1.14.1) ; python_version < \"3.9\"", "mypy (==1.15.0) ; python_version >= \"3.9\"", "pip (==25.0.1)", "pylint (==3.2.7) ; python_version < \"3.9\"", "pylint (==3.3.6) ; python_version >= \"3.9\"", "ruff (==0.11.2)", "twine (==6.1.0)", "uv (==0.6.11)"]
 
+[[package]]
+name = "jsonlines"
+version = "3.1.0"
+description = "Library with helpers for the jsonlines file format"
+optional = true
+python-versions = ">=3.6"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "jsonlines-3.1.0-py3-none-any.whl", hash = "sha256:632f5e38f93dfcb1ac8c4e09780b92af3a55f38f26e7c47ae85109d420b6ad39"},
+    {file = "jsonlines-3.1.0.tar.gz", hash = "sha256:2579cb488d96f815b0eb81629e3e6b0332da0962a18fa3532958f7ba14a5c37f"},
+]
+
+[package.dependencies]
+attrs = ">=19.2.0"
+
 [[package]]
 name = "jsonpatch"
 version = "1.33"
@@ -4079,6 +4203,19 @@ files = [
     {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"},
 ]
 
+[[package]]
+name = "jsonref"
+version = "1.1.0"
+description = "jsonref is a library for automatic dereferencing of JSON Reference objects for Python."
+optional = true
+python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9"},
+    {file = "jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552"},
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.25.1"
@@ -4770,6 +4907,19 @@ interegular = ["interegular (>=0.3.1,<0.4.0)"]
 nearley = ["js2py"]
 regex = ["regex"]
 
+[[package]]
+name = "latex2mathml"
+version = "3.78.1"
+description = "Pure Python library for LaTeX to MathML conversion"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "latex2mathml-3.78.1-py3-none-any.whl", hash = "sha256:f089b6d75e85b937f99693c93e8c16c0804008672c3dd2a3d25affd36f238100"},
+    {file = "latex2mathml-3.78.1.tar.gz", hash = "sha256:f941db80bf41db33f31df87b304e8b588f8166b813b0257c11c98f7a9d0aac71"},
+]
+
 [[package]]
 name = "limits"
 version = "4.8.0"
@@ -4937,160 +5087,113 @@ dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; pytho
 
 [[package]]
 name = "lxml"
-version = "6.0.2"
+version = "4.9.4"
 description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
 optional = true
-python-versions = ">=3.8"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"scraping\" or extra == \"docs\" or extra == \"docling\""
 files = [
-    {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"},
-    {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"},
-    {file = "lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31"},
-    {file = "lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9"},
-    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8"},
-    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba"},
-    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"},
-    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c"},
-    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321"},
-    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1"},
-    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34"},
-    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a"},
-    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c"},
-    {file = "lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b"},
-    {file = "lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0"},
-    {file = "lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5"},
-    {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607"},
-    {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938"},
-    {file = "lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d"},
-    {file = "lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438"},
-    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964"},
-    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d"},
-    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7"},
-    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178"},
-    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553"},
-    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb"},
-    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a"},
-    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c"},
-    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7"},
-    {file = "lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46"},
-    {file = "lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078"},
-    {file = "lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285"},
-    {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456"},
-    {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0"},
-    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092"},
-    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f"},
-    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8"},
-    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f"},
-    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6"},
-    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322"},
-    {file = "lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849"},
-    {file = "lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f"},
-    {file = "lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6"},
-    {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77"},
-    {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6"},
-    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a"},
-    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679"},
-    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659"},
-    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484"},
-    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2"},
-    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314"},
-    {file = "lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2"},
-    {file = "lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7"},
-    {file = "lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf"},
-    {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe"},
-    {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37"},
-    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9"},
-    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917"},
-    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f"},
-    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8"},
-    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a"},
-    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c"},
-    {file = "lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b"},
-    {file = "lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed"},
-    {file = "lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8"},
-    {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d"},
-    {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d"},
-    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9"},
-    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e"},
-    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d"},
-    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec"},
-    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272"},
-    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f"},
-    {file = "lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312"},
-    {file = "lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca"},
-    {file = "lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c"},
-    {file = "lxml-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62"},
-    {file = "lxml-6.0.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f"},
-    {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7"},
-    {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3"},
-    {file = "lxml-6.0.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac"},
-    {file = "lxml-6.0.2-cp38-cp38-win32.whl", hash = "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512"},
-    {file = "lxml-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca"},
-    {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694"},
-    {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90"},
-    {file = "lxml-6.0.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62"},
-    {file = "lxml-6.0.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444"},
-    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad"},
-    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48"},
-    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93"},
-    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c"},
-    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c"},
-    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf"},
-    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6"},
-    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84"},
-    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb"},
-    {file = "lxml-6.0.2-cp39-cp39-win32.whl", hash = "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f"},
-    {file = "lxml-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304"},
-    {file = "lxml-6.0.2-cp39-cp39-win_arm64.whl", hash = "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa"},
-    {file = "lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6"},
-    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba"},
-    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5"},
-    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4"},
-    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d"},
-    {file = "lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d"},
-    {file = "lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700"},
-    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee"},
-    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f"},
-    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9"},
-    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a"},
-    {file = "lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e"},
-    {file = "lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62"},
+    {file = "lxml-4.9.4-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e214025e23db238805a600f1f37bf9f9a15413c7bf5f9d6ae194f84980c78722"},
+    {file = "lxml-4.9.4-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ec53a09aee61d45e7dbe7e91252ff0491b6b5fee3d85b2d45b173d8ab453efc1"},
+    {file = "lxml-4.9.4-cp27-cp27m-win32.whl", hash = "sha256:7d1d6c9e74c70ddf524e3c09d9dc0522aba9370708c2cb58680ea40174800013"},
+    {file = "lxml-4.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:cb53669442895763e61df5c995f0e8361b61662f26c1b04ee82899c2789c8f69"},
+    {file = "lxml-4.9.4-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:647bfe88b1997d7ae8d45dabc7c868d8cb0c8412a6e730a7651050b8c7289cf2"},
+    {file = "lxml-4.9.4-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4d973729ce04784906a19108054e1fd476bc85279a403ea1a72fdb051c76fa48"},
+    {file = "lxml-4.9.4-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:056a17eaaf3da87a05523472ae84246f87ac2f29a53306466c22e60282e54ff8"},
+    {file = "lxml-4.9.4-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:aaa5c173a26960fe67daa69aa93d6d6a1cd714a6eb13802d4e4bd1d24a530644"},
+    {file = "lxml-4.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:647459b23594f370c1c01768edaa0ba0959afc39caeeb793b43158bb9bb6a663"},
+    {file = "lxml-4.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:bdd9abccd0927673cffe601d2c6cdad1c9321bf3437a2f507d6b037ef91ea307"},
+    {file = "lxml-4.9.4-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:00e91573183ad273e242db5585b52670eddf92bacad095ce25c1e682da14ed91"},
+    {file = "lxml-4.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a602ed9bd2c7d85bd58592c28e101bd9ff9c718fbde06545a70945ffd5d11868"},
+    {file = "lxml-4.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:de362ac8bc962408ad8fae28f3967ce1a262b5d63ab8cefb42662566737f1dc7"},
+    {file = "lxml-4.9.4-cp310-cp310-win32.whl", hash = "sha256:33714fcf5af4ff7e70a49731a7cc8fd9ce910b9ac194f66eaa18c3cc0a4c02be"},
+    {file = "lxml-4.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:d3caa09e613ece43ac292fbed513a4bce170681a447d25ffcbc1b647d45a39c5"},
+    {file = "lxml-4.9.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:359a8b09d712df27849e0bcb62c6a3404e780b274b0b7e4c39a88826d1926c28"},
+    {file = "lxml-4.9.4-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:43498ea734ccdfb92e1886dfedaebeb81178a241d39a79d5351ba2b671bff2b2"},
+    {file = "lxml-4.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4855161013dfb2b762e02b3f4d4a21cc7c6aec13c69e3bffbf5022b3e708dd97"},
+    {file = "lxml-4.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c71b5b860c5215fdbaa56f715bc218e45a98477f816b46cfde4a84d25b13274e"},
+    {file = "lxml-4.9.4-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9a2b5915c333e4364367140443b59f09feae42184459b913f0f41b9fed55794a"},
+    {file = "lxml-4.9.4-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d82411dbf4d3127b6cde7da0f9373e37ad3a43e89ef374965465928f01c2b979"},
+    {file = "lxml-4.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:273473d34462ae6e97c0f4e517bd1bf9588aa67a1d47d93f760a1282640e24ac"},
+    {file = "lxml-4.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:389d2b2e543b27962990ab529ac6720c3dded588cc6d0f6557eec153305a3622"},
+    {file = "lxml-4.9.4-cp311-cp311-win32.whl", hash = "sha256:8aecb5a7f6f7f8fe9cac0bcadd39efaca8bbf8d1bf242e9f175cbe4c925116c3"},
+    {file = "lxml-4.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:c7721a3ef41591341388bb2265395ce522aba52f969d33dacd822da8f018aff8"},
+    {file = "lxml-4.9.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:dbcb2dc07308453db428a95a4d03259bd8caea97d7f0776842299f2d00c72fc8"},
+    {file = "lxml-4.9.4-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:01bf1df1db327e748dcb152d17389cf6d0a8c5d533ef9bab781e9d5037619229"},
+    {file = "lxml-4.9.4-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e8f9f93a23634cfafbad6e46ad7d09e0f4a25a2400e4a64b1b7b7c0fbaa06d9d"},
+    {file = "lxml-4.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3f3f00a9061605725df1816f5713d10cd94636347ed651abdbc75828df302b20"},
+    {file = "lxml-4.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:953dd5481bd6252bd480d6ec431f61d7d87fdcbbb71b0d2bdcfc6ae00bb6fb10"},
+    {file = "lxml-4.9.4-cp312-cp312-win32.whl", hash = "sha256:266f655d1baff9c47b52f529b5f6bec33f66042f65f7c56adde3fcf2ed62ae8b"},
+    {file = "lxml-4.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:f1faee2a831fe249e1bae9cbc68d3cd8a30f7e37851deee4d7962b17c410dd56"},
+    {file = "lxml-4.9.4-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:23d891e5bdc12e2e506e7d225d6aa929e0a0368c9916c1fddefab88166e98b20"},
+    {file = "lxml-4.9.4-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e96a1788f24d03e8d61679f9881a883ecdf9c445a38f9ae3f3f193ab6c591c66"},
+    {file = "lxml-4.9.4-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:5557461f83bb7cc718bc9ee1f7156d50e31747e5b38d79cf40f79ab1447afd2d"},
+    {file = "lxml-4.9.4-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:fdb325b7fba1e2c40b9b1db407f85642e32404131c08480dd652110fc908561b"},
+    {file = "lxml-4.9.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d74d4a3c4b8f7a1f676cedf8e84bcc57705a6d7925e6daef7a1e54ae543a197"},
+    {file = "lxml-4.9.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ac7674d1638df129d9cb4503d20ffc3922bd463c865ef3cb412f2c926108e9a4"},
+    {file = "lxml-4.9.4-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:ddd92e18b783aeb86ad2132d84a4b795fc5ec612e3545c1b687e7747e66e2b53"},
+    {file = "lxml-4.9.4-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2bd9ac6e44f2db368ef8986f3989a4cad3de4cd55dbdda536e253000c801bcc7"},
+    {file = "lxml-4.9.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:bc354b1393dce46026ab13075f77b30e40b61b1a53e852e99d3cc5dd1af4bc85"},
+    {file = "lxml-4.9.4-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:f836f39678cb47c9541f04d8ed4545719dc31ad850bf1832d6b4171e30d65d23"},
+    {file = "lxml-4.9.4-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:9c131447768ed7bc05a02553d939e7f0e807e533441901dd504e217b76307745"},
+    {file = "lxml-4.9.4-cp36-cp36m-win32.whl", hash = "sha256:bafa65e3acae612a7799ada439bd202403414ebe23f52e5b17f6ffc2eb98c2be"},
+    {file = "lxml-4.9.4-cp36-cp36m-win_amd64.whl", hash = "sha256:6197c3f3c0b960ad033b9b7d611db11285bb461fc6b802c1dd50d04ad715c225"},
+    {file = "lxml-4.9.4-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:7b378847a09d6bd46047f5f3599cdc64fcb4cc5a5a2dd0a2af610361fbe77b16"},
+    {file = "lxml-4.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:1343df4e2e6e51182aad12162b23b0a4b3fd77f17527a78c53f0f23573663545"},
+    {file = "lxml-4.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6dbdacf5752fbd78ccdb434698230c4f0f95df7dd956d5f205b5ed6911a1367c"},
+    {file = "lxml-4.9.4-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:506becdf2ecaebaf7f7995f776394fcc8bd8a78022772de66677c84fb02dd33d"},
+    {file = "lxml-4.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca8e44b5ba3edb682ea4e6185b49661fc22b230cf811b9c13963c9f982d1d964"},
+    {file = "lxml-4.9.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9d9d5726474cbbef279fd709008f91a49c4f758bec9c062dfbba88eab00e3ff9"},
+    {file = "lxml-4.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:bbdd69e20fe2943b51e2841fc1e6a3c1de460d630f65bde12452d8c97209464d"},
+    {file = "lxml-4.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8671622256a0859f5089cbe0ce4693c2af407bc053dcc99aadff7f5310b4aa02"},
+    {file = "lxml-4.9.4-cp37-cp37m-win32.whl", hash = "sha256:dd4fda67f5faaef4f9ee5383435048ee3e11ad996901225ad7615bc92245bc8e"},
+    {file = "lxml-4.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6bee9c2e501d835f91460b2c904bc359f8433e96799f5c2ff20feebd9bb1e590"},
+    {file = "lxml-4.9.4-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:1f10f250430a4caf84115b1e0f23f3615566ca2369d1962f82bef40dd99cd81a"},
+    {file = "lxml-4.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:3b505f2bbff50d261176e67be24e8909e54b5d9d08b12d4946344066d66b3e43"},
+    {file = "lxml-4.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1449f9451cd53e0fd0a7ec2ff5ede4686add13ac7a7bfa6988ff6d75cff3ebe2"},
+    {file = "lxml-4.9.4-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:4ece9cca4cd1c8ba889bfa67eae7f21d0d1a2e715b4d5045395113361e8c533d"},
+    {file = "lxml-4.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:59bb5979f9941c61e907ee571732219fa4774d5a18f3fa5ff2df963f5dfaa6bc"},
+    {file = "lxml-4.9.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b1980dbcaad634fe78e710c8587383e6e3f61dbe146bcbfd13a9c8ab2d7b1192"},
+    {file = "lxml-4.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9ae6c3363261021144121427b1552b29e7b59de9d6a75bf51e03bc072efb3c37"},
+    {file = "lxml-4.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bcee502c649fa6351b44bb014b98c09cb00982a475a1912a9881ca28ab4f9cd9"},
+    {file = "lxml-4.9.4-cp38-cp38-win32.whl", hash = "sha256:a8edae5253efa75c2fc79a90068fe540b197d1c7ab5803b800fccfe240eed33c"},
+    {file = "lxml-4.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:701847a7aaefef121c5c0d855b2affa5f9bd45196ef00266724a80e439220e46"},
+    {file = "lxml-4.9.4-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:f610d980e3fccf4394ab3806de6065682982f3d27c12d4ce3ee46a8183d64a6a"},
+    {file = "lxml-4.9.4-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:aa9b5abd07f71b081a33115d9758ef6077924082055005808f68feccb27616bd"},
+    {file = "lxml-4.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:365005e8b0718ea6d64b374423e870648ab47c3a905356ab6e5a5ff03962b9a9"},
+    {file = "lxml-4.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:16b9ec51cc2feab009e800f2c6327338d6ee4e752c76e95a35c4465e80390ccd"},
+    {file = "lxml-4.9.4-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a905affe76f1802edcac554e3ccf68188bea16546071d7583fb1b693f9cf756b"},
+    {file = "lxml-4.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fd814847901df6e8de13ce69b84c31fc9b3fb591224d6762d0b256d510cbf382"},
+    {file = "lxml-4.9.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91bbf398ac8bb7d65a5a52127407c05f75a18d7015a270fdd94bbcb04e65d573"},
+    {file = "lxml-4.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f99768232f036b4776ce419d3244a04fe83784bce871b16d2c2e984c7fcea847"},
+    {file = "lxml-4.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bb5bd6212eb0edfd1e8f254585290ea1dadc3687dd8fd5e2fd9a87c31915cdab"},
+    {file = "lxml-4.9.4-cp39-cp39-win32.whl", hash = "sha256:88f7c383071981c74ec1998ba9b437659e4fd02a3c4a4d3efc16774eb108d0ec"},
+    {file = "lxml-4.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:936e8880cc00f839aa4173f94466a8406a96ddce814651075f95837316369899"},
+    {file = "lxml-4.9.4-pp310-pypy310_pp73-macosx_11_0_x86_64.whl", hash = "sha256:f6c35b2f87c004270fa2e703b872fcc984d714d430b305145c39d53074e1ffe0"},
+    {file = "lxml-4.9.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:606d445feeb0856c2b424405236a01c71af7c97e5fe42fbc778634faef2b47e4"},
+    {file = "lxml-4.9.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a1bdcbebd4e13446a14de4dd1825f1e778e099f17f79718b4aeaf2403624b0f7"},
+    {file = "lxml-4.9.4-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0a08c89b23117049ba171bf51d2f9c5f3abf507d65d016d6e0fa2f37e18c0fc5"},
+    {file = "lxml-4.9.4-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:232fd30903d3123be4c435fb5159938c6225ee8607b635a4d3fca847003134ba"},
+    {file = "lxml-4.9.4-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:231142459d32779b209aa4b4d460b175cadd604fed856f25c1571a9d78114771"},
+    {file = "lxml-4.9.4-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:520486f27f1d4ce9654154b4494cf9307b495527f3a2908ad4cb48e4f7ed7ef7"},
+    {file = "lxml-4.9.4-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:562778586949be7e0d7435fcb24aca4810913771f845d99145a6cee64d5b67ca"},
+    {file = "lxml-4.9.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a9e7c6d89c77bb2770c9491d988f26a4b161d05c8ca58f63fb1f1b6b9a74be45"},
+    {file = "lxml-4.9.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:786d6b57026e7e04d184313c1359ac3d68002c33e4b1042ca58c362f1d09ff58"},
+    {file = "lxml-4.9.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95ae6c5a196e2f239150aa4a479967351df7f44800c93e5a975ec726fef005e2"},
+    {file = "lxml-4.9.4-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:9b556596c49fa1232b0fff4b0e69b9d4083a502e60e404b44341e2f8fb7187f5"},
+    {file = "lxml-4.9.4-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:cc02c06e9e320869d7d1bd323df6dd4281e78ac2e7f8526835d3d48c69060683"},
+    {file = "lxml-4.9.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:857d6565f9aa3464764c2cb6a2e3c2e75e1970e877c188f4aeae45954a314e0c"},
+    {file = "lxml-4.9.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c42ae7e010d7d6bc51875d768110c10e8a59494855c3d4c348b068f5fb81fdcd"},
+    {file = "lxml-4.9.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f10250bb190fb0742e3e1958dd5c100524c2cc5096c67c8da51233f7448dc137"},
+    {file = "lxml-4.9.4.tar.gz", hash = "sha256:b1541e50b78e15fa06a2670157a1962ef06591d4c998b998047fff5e3236880e"},
 ]
 
 [package.extras]
 cssselect = ["cssselect (>=0.7)"]
-html-clean = ["lxml_html_clean"]
 html5 = ["html5lib"]
 htmlsoup = ["BeautifulSoup4"]
+source = ["Cython (==0.29.37)"]
 
 [[package]]
 name = "madoka"
@@ -5177,6 +5280,24 @@ profiling = ["gprof2dot"]
 rtd = ["ipykernel", "jupyter_sphinx", "mdit-py-plugins (>=0.5.0)", "myst-parser", "pyyaml", "sphinx", "sphinx-book-theme (>=1.0,<2.0)", "sphinx-copybutton", "sphinx-design"]
 testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions", "requests"]
 
+[[package]]
+name = "marko"
+version = "2.2.1"
+description = "A markdown parser with high extensibility."
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "marko-2.2.1-py3-none-any.whl", hash = "sha256:31e9a18b35c113e506ace5594716fa3df2872f8955908e279bc551f3eb1f0db8"},
+    {file = "marko-2.2.1.tar.gz", hash = "sha256:e29d7e071a3b0cb2f7cc4c500d55f893dc5a45d85a8298dde6cb4e4dffd794d3"},
+]
+
+[package.extras]
+codehilite = ["pygments"]
+repr = ["objprint"]
+toc = ["python-slugify"]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.2"
@@ -5893,6 +6014,34 @@ files = [
     {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"},
 ]
 
+[[package]]
+name = "mpire"
+version = "2.10.2"
+description = "A Python package for easy multiprocessing, but faster than multiprocessing"
+optional = true
+python-versions = "*"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "mpire-2.10.2-py3-none-any.whl", hash = "sha256:d627707f7a8d02aa4c7f7d59de399dec5290945ddf7fbd36cbb1d6ebb37a51fb"},
+    {file = "mpire-2.10.2.tar.gz", hash = "sha256:f66a321e93fadff34585a4bfa05e95bd946cf714b442f51c529038eb45773d97"},
+]
+
+[package.dependencies]
+multiprocess = [
+    {version = "*", optional = true, markers = "python_version < \"3.11\" and extra == \"dill\""},
+    {version = ">=0.70.15", optional = true, markers = "python_version >= \"3.11\" and extra == \"dill\""},
+]
+pygments = ">=2.0"
+pywin32 = {version = ">=301", markers = "platform_system == \"Windows\""}
+tqdm = ">=4.27"
+
+[package.extras]
+dashboard = ["flask"]
+dill = ["multiprocess (>=0.70.15) ; python_version >= \"3.11\"", "multiprocess ; python_version < \"3.11\""]
+docs = ["docutils (==0.17.1)", "sphinx (==3.2.1)", "sphinx-autodoc-typehints (==1.11.0)", "sphinx-rtd-theme (==0.5.0)", "sphinx-versions (==1.0.1)", "sphinxcontrib-images (==0.9.2)"]
+testing = ["ipywidgets", "multiprocess (>=0.70.15) ; python_version >= \"3.11\"", "multiprocess ; python_version < \"3.11\"", "numpy", "pywin32 (>=301) ; platform_system == \"Windows\"", "rich"]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -6051,6 +6200,39 @@ files = [
 [package.dependencies]
 typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
 
+[[package]]
+name = "multiprocess"
+version = "0.70.18"
+description = "better multiprocessing and multithreading in Python"
+optional = true
+python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "multiprocess-0.70.18-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:25d4012dcaaf66b9e8e955f58482b42910c2ee526d532844d8bcf661bbc604df"},
+    {file = "multiprocess-0.70.18-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:06b19433de0d02afe5869aec8931dd5c01d99074664f806c73896b0d9e527213"},
+    {file = "multiprocess-0.70.18-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6fa1366f994373aaf2d4738b0f56e707caeaa05486e97a7f71ee0853823180c2"},
+    {file = "multiprocess-0.70.18-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b8940ae30139e04b076da6c5b83e9398585ebdf0f2ad3250673fef5b2ff06d6"},
+    {file = "multiprocess-0.70.18-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0929ba95831adb938edbd5fb801ac45e705ecad9d100b3e653946b7716cb6bd3"},
+    {file = "multiprocess-0.70.18-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4d77f8e4bfe6c6e2e661925bbf9aed4d5ade9a1c6502d5dfc10129b9d1141797"},
+    {file = "multiprocess-0.70.18-pp38-pypy38_pp73-macosx_10_9_arm64.whl", hash = "sha256:2dbaae9bffa1fb2d58077c0044ffe87a8c8974e90fcf778cdf90e139c970d42a"},
+    {file = "multiprocess-0.70.18-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bcac5a4e81f1554d98d1bba963eeb1bd24966432f04fcbd29b6e1a16251ad712"},
+    {file = "multiprocess-0.70.18-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c0c7cd75d0987ab6166d64e654787c781dbacbcbcaaede4c1ffe664720b3e14b"},
+    {file = "multiprocess-0.70.18-pp39-pypy39_pp73-macosx_10_13_arm64.whl", hash = "sha256:9fd8d662f7524a95a1be7cbea271f0b33089fe792baabec17d93103d368907da"},
+    {file = "multiprocess-0.70.18-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:3fbba48bfcd932747c33f0b152b26207c4e0840c35cab359afaff7a8672b1031"},
+    {file = "multiprocess-0.70.18-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5f9be0342e597dde86152c10442c5fb6c07994b1c29de441b7a3a08b0e6be2a0"},
+    {file = "multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea"},
+    {file = "multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d"},
+    {file = "multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2"},
+    {file = "multiprocess-0.70.18-py313-none-any.whl", hash = "sha256:871743755f43ef57d7910a38433cfe41319e72be1bbd90b79c7a5ac523eb9334"},
+    {file = "multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b"},
+    {file = "multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8"},
+    {file = "multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d"},
+]
+
+[package.dependencies]
+dill = ">=0.4.0"
+
 [[package]]
 name = "mypy"
 version = "1.18.2"
@@ -6536,7 +6718,7 @@ description = "CUBLAS native runtime libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0"},
     {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142"},
@@ -6550,7 +6732,7 @@ description = "CUDA profiling tools runtime libs."
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed"},
     {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182"},
@@ -6564,7 +6746,7 @@ description = "NVRTC native runtime libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994"},
     {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8"},
@@ -6578,7 +6760,7 @@ description = "CUDA Runtime native Libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d"},
     {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90"},
@@ -6592,7 +6774,7 @@ description = "cuDNN runtime libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8"},
     {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8"},
@@ -6609,7 +6791,7 @@ description = "CUFFT native runtime libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a"},
     {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74"},
@@ -6626,7 +6808,7 @@ description = "cuFile GPUDirect libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc"},
     {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a"},
@@ -6639,7 +6821,7 @@ description = "CURAND native runtime libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd"},
     {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9"},
@@ -6653,7 +6835,7 @@ description = "CUDA solver native runtime libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0"},
     {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450"},
@@ -6672,7 +6854,7 @@ description = "CUSPARSE native runtime libraries"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc"},
     {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b"},
@@ -6689,7 +6871,7 @@ description = "NVIDIA cuSPARSELt"
 optional = true
 python-versions = "*"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5"},
     {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623"},
@@ -6703,7 +6885,7 @@ description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f"},
     {file = "nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039"},
@@ -6716,7 +6898,7 @@ description = "Nvidia JIT LTO Library"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88"},
     {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7"},
@@ -6730,7 +6912,7 @@ description = "NVIDIA Tools Extension"
 optional = true
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615"},
     {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f"},
@@ -6755,6 +6937,24 @@ rsa = ["cryptography (>=3.0.0)"]
 signals = ["blinker (>=1.4.0)"]
 signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 
+[[package]]
+name = "ocrmac"
+version = "1.0.0"
+description = "A python wrapper to extract text from images on a mac system. Uses the vision framework from Apple."
+optional = true
+python-versions = ">=3.6"
+groups = ["main"]
+markers = "sys_platform == \"darwin\" and extra == \"docling\""
+files = [
+    {file = "ocrmac-1.0.0-py2.py3-none-any.whl", hash = "sha256:0b5a072aa23a9ead48132cb2d595b680aa6c3c5a6cb69525155e35ca95610c3a"},
+    {file = "ocrmac-1.0.0.tar.gz", hash = "sha256:5b299e9030c973d1f60f82db000d6c2e5ff271601878c7db0885e850597d1d2e"},
+]
+
+[package.dependencies]
+Click = ">=7.0"
+pillow = "*"
+pyobjc-framework-Vision = "*"
+
 [[package]]
 name = "olefile"
 version = "0.47"
@@ -6795,7 +6995,7 @@ description = "A flexible configuration library"
 optional = true
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docling\" or extra == \"docs\""
 files = [
     {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"},
     {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"},
@@ -6930,7 +7130,7 @@ description = "Wrapper package for OpenCV python bindings."
 optional = true
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docling\" or extra == \"docs\""
 files = [
     {file = "opencv-python-4.11.0.86.tar.gz", hash = "sha256:03d60ccae62304860d232272e4a4fda93c39d595780cb40b161b310244b736a4"},
     {file = "opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:432f67c223f1dc2824f5e73cdfcd9db0efc8710647d4e813012195dc9122a52a"},
@@ -6949,6 +7149,32 @@ numpy = [
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 
+[[package]]
+name = "opencv-python-headless"
+version = "4.11.0.86"
+description = "Wrapper package for OpenCV python bindings."
+optional = true
+python-versions = ">=3.6"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "opencv-python-headless-4.11.0.86.tar.gz", hash = "sha256:996eb282ca4b43ec6a3972414de0e2331f5d9cda2b41091a49739c19fb843798"},
+    {file = "opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:48128188ade4a7e517237c8e1e11a9cdf5c282761473383e77beb875bb1e61ca"},
+    {file = "opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:a66c1b286a9de872c343ee7c3553b084244299714ebb50fbdcd76f07ebbe6c81"},
+    {file = "opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6efabcaa9df731f29e5ea9051776715b1bdd1845d7c9530065c7951d2a2899eb"},
+    {file = "opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e0a27c19dd1f40ddff94976cfe43066fbbe9dfbb2ec1907d66c19caef42a57b"},
+    {file = "opencv_python_headless-4.11.0.86-cp37-abi3-win32.whl", hash = "sha256:f447d8acbb0b6f2808da71fddd29c1cdd448d2bc98f72d9bb78a7a898fc9621b"},
+    {file = "opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:6c304df9caa7a6a5710b91709dd4786bf20a74d57672b3c31f7033cc638174ca"},
+]
+
+[package.dependencies]
+numpy = [
+    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""},
+    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+]
+
 [[package]]
 name = "openpyxl"
 version = "3.1.5"
@@ -6956,7 +7182,7 @@ description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"},
     {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"},
@@ -7314,7 +7540,7 @@ description = "Powerful data structures for data analysis, time series, and stat
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"docs\" or extra == \"evals\""
+markers = "extra == \"docs\" or extra == \"evals\" or extra == \"docling\""
 files = [
     {file = "pandas-2.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52bc29a946304c360561974c6542d1dd628ddafa69134a7131fdfd6a5d7a1a35"},
     {file = "pandas-2.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:220cc5c35ffaa764dd5bb17cf42df283b5cb7fdf49e10a7b053a06c9cb48ee2b"},
@@ -7887,6 +8113,29 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-a
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"]
 type = ["mypy (>=1.14.1)"]
 
+[[package]]
+name = "playwright"
+version = "1.55.0"
+description = "A high-level API to automate web browsers"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"scraping\""
+files = [
+    {file = "playwright-1.55.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:d7da108a95001e412effca4f7610de79da1637ccdf670b1ae3fdc08b9694c034"},
+    {file = "playwright-1.55.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8290cf27a5d542e2682ac274da423941f879d07b001f6575a5a3a257b1d4ba1c"},
+    {file = "playwright-1.55.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:25b0d6b3fd991c315cca33c802cf617d52980108ab8431e3e1d37b5de755c10e"},
+    {file = "playwright-1.55.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c6d4d8f6f8c66c483b0835569c7f0caa03230820af8e500c181c93509c92d831"},
+    {file = "playwright-1.55.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29a0777c4ce1273acf90c87e4ae2fe0130182100d99bcd2ae5bf486093044838"},
+    {file = "playwright-1.55.0-py3-none-win32.whl", hash = "sha256:29e6d1558ad9d5b5c19cbec0a72f6a2e35e6353cd9f262e22148685b86759f90"},
+    {file = "playwright-1.55.0-py3-none-win_amd64.whl", hash = "sha256:7eb5956473ca1951abb51537e6a0da55257bb2e25fc37c2b75af094a5c93736c"},
+    {file = "playwright-1.55.0-py3-none-win_arm64.whl", hash = "sha256:012dc89ccdcbd774cdde8aeee14c08e0dd52ddb9135bf10e9db040527386bd76"},
+]
+
+[package.dependencies]
+greenlet = ">=3.1.1,<4.0.0"
+pyee = ">=13,<14"
+
 [[package]]
 name = "plotly"
 version = "6.3.0"
@@ -7919,7 +8168,7 @@ description = "plugin and hook calling mechanisms for python"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\""
+markers = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\""
 files = [
     {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
     {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
@@ -7942,6 +8191,32 @@ files = [
     {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
 ]
 
+[[package]]
+name = "polyfactory"
+version = "2.22.2"
+description = "Mock data generation factories"
+optional = true
+python-versions = "<4.0,>=3.8"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "polyfactory-2.22.2-py3-none-any.whl", hash = "sha256:9bea58ac9a80375b4153cd60820f75e558b863e567e058794d28c6a52b84118a"},
+    {file = "polyfactory-2.22.2.tar.gz", hash = "sha256:a3297aa0b004f2b26341e903795565ae88507c4d86e68b132c2622969028587a"},
+]
+
+[package.dependencies]
+faker = ">=5.0.0"
+typing-extensions = ">=4.6.0"
+
+[package.extras]
+attrs = ["attrs (>=22.2.0)"]
+beanie = ["beanie", "pydantic[email]", "pymongo (<4.9)"]
+full = ["attrs", "beanie", "msgspec", "odmantic", "pydantic", "sqlalchemy"]
+msgspec = ["msgspec"]
+odmantic = ["odmantic (<1.0.0)", "pydantic[email]"]
+pydantic = ["pydantic[email] (>=1.10)"]
+sqlalchemy = ["sqlalchemy (>=1.4.29)"]
+
 [[package]]
 name = "pondpond"
 version = "1.4.1"
@@ -8165,6 +8440,19 @@ files = [
     {file = "propcache-0.3.2.tar.gz", hash = "sha256:20d7d62e4e7ef05f221e0db2856b979540686342e7dd9973b815599c7057e168"},
 ]
 
+[[package]]
+name = "protego"
+version = "0.5.0"
+description = "Pure-Python robots.txt parser with support for modern conventions"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"scraping\""
+files = [
+    {file = "protego-0.5.0-py3-none-any.whl", hash = "sha256:4237227840a67fdeec289a9b89652455b5657806388c17e1a556e160435f8fc5"},
+    {file = "protego-0.5.0.tar.gz", hash = "sha256:225dee0acfcc71de8c6f7cef9c618e5a9d3e7baa7ae1470b8d076a064033c463"},
+]
+
 [[package]]
 name = "proto-plus"
 version = "1.26.1"
@@ -8172,7 +8460,7 @@ description = "Beautiful, Pythonic protocol buffers"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\""
+markers = "extra == \"docs\""
 files = [
     {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
     {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
@@ -8212,7 +8500,7 @@ description = "Cross-platform lib for process and system monitoring."
 optional = true
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\""
+markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "psutil-7.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:76168cef4397494250e9f4e73eb3752b146de1dd950040b29186d0cce1d5ca13"},
     {file = "psutil-7.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:5d007560c8c372efdff9e4579c2846d71de737e4605f611437255e81efcca2c5"},
@@ -8244,6 +8532,7 @@ files = [
     {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
     {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
     {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
+    {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
     {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
     {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
     {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@@ -8515,7 +8804,7 @@ description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
     {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
@@ -8528,7 +8817,7 @@ description = "A collection of ASN.1-based protocols modules"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"},
     {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"},
@@ -8537,6 +8826,67 @@ files = [
 [package.dependencies]
 pyasn1 = ">=0.6.1,<0.7.0"
 
+[[package]]
+name = "pyclipper"
+version = "1.3.0.post6"
+description = "Cython wrapper for the C++ translation of the Angus Johnson's Clipper library (ver. 6.4.2)"
+optional = true
+python-versions = "*"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "pyclipper-1.3.0.post6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fa0f5e78cfa8262277bb3d0225537b3c2a90ef68fd90a229d5d24cf49955dcf4"},
+    {file = "pyclipper-1.3.0.post6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a01f182d8938c1dc515e8508ed2442f7eebd2c25c7d5cb29281f583c1a8008a4"},
+    {file = "pyclipper-1.3.0.post6-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:640f20975727994d4abacd07396f564e9e5665ba5cb66ceb36b300c281f84fa4"},
+    {file = "pyclipper-1.3.0.post6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63002f6bb0f1efa87c0b81634cbb571066f237067e23707dabf746306c92ba5"},
+    {file = "pyclipper-1.3.0.post6-cp310-cp310-win32.whl", hash = "sha256:106b8622cd9fb07d80cbf9b1d752334c55839203bae962376a8c59087788af26"},
+    {file = "pyclipper-1.3.0.post6-cp310-cp310-win_amd64.whl", hash = "sha256:9699e98862dadefd0bea2360c31fa61ca553c660cbf6fb44993acde1b959f58f"},
+    {file = "pyclipper-1.3.0.post6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4247e7c44b34c87acbf38f99d48fb1acaf5da4a2cf4dcd601a9b24d431be4ef"},
+    {file = "pyclipper-1.3.0.post6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:851b3e58106c62a5534a1201295fe20c21714dee2eda68081b37ddb0367e6caa"},
+    {file = "pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16cc1705a915896d2aff52131c427df02265631279eac849ebda766432714cc0"},
+    {file = "pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace1f0753cf71c5c5f6488b8feef5dd0fa8b976ad86b24bb51f708f513df4aac"},
+    {file = "pyclipper-1.3.0.post6-cp311-cp311-win32.whl", hash = "sha256:dbc828641667142751b1127fd5c4291663490cf05689c85be4c5bcc89aaa236a"},
+    {file = "pyclipper-1.3.0.post6-cp311-cp311-win_amd64.whl", hash = "sha256:1c03f1ae43b18ee07730c3c774cc3cf88a10c12a4b097239b33365ec24a0a14a"},
+    {file = "pyclipper-1.3.0.post6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:6363b9d79ba1b5d8f32d1623e797c1e9f994600943402e68d5266067bdde173e"},
+    {file = "pyclipper-1.3.0.post6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32cd7fb9c1c893eb87f82a072dbb5e26224ea7cebbad9dc306d67e1ac62dd229"},
+    {file = "pyclipper-1.3.0.post6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3aab10e3c10ed8fa60c608fb87c040089b83325c937f98f06450cf9fcfdaf1d"},
+    {file = "pyclipper-1.3.0.post6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58eae2ff92a8cae1331568df076c4c5775bf946afab0068b217f0cf8e188eb3c"},
+    {file = "pyclipper-1.3.0.post6-cp312-cp312-win32.whl", hash = "sha256:793b0aa54b914257aa7dc76b793dd4dcfb3c84011d48df7e41ba02b571616eaf"},
+    {file = "pyclipper-1.3.0.post6-cp312-cp312-win_amd64.whl", hash = "sha256:d3f9da96f83b8892504923beb21a481cd4516c19be1d39eb57a92ef1c9a29548"},
+    {file = "pyclipper-1.3.0.post6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f129284d2c7bcd213d11c0f35e1ae506a1144ce4954e9d1734d63b120b0a1b58"},
+    {file = "pyclipper-1.3.0.post6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:188fbfd1d30d02247f92c25ce856f5f3c75d841251f43367dbcf10935bc48f38"},
+    {file = "pyclipper-1.3.0.post6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6d129d0c2587f2f5904d201a4021f859afbb45fada4261c9fdedb2205b09d23"},
+    {file = "pyclipper-1.3.0.post6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c9c80b5c46eef38ba3f12dd818dc87f5f2a0853ba914b6f91b133232315f526"},
+    {file = "pyclipper-1.3.0.post6-cp313-cp313-win32.whl", hash = "sha256:b15113ec4fc423b58e9ae80aa95cf5a0802f02d8f02a98a46af3d7d66ff0cc0e"},
+    {file = "pyclipper-1.3.0.post6-cp313-cp313-win_amd64.whl", hash = "sha256:e5ff68fa770ac654c7974fc78792978796f068bd274e95930c0691c31e192889"},
+    {file = "pyclipper-1.3.0.post6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c92e41301a8f25f9adcd90954512038ed5f774a2b8c04a4a9db261b78ff75e3a"},
+    {file = "pyclipper-1.3.0.post6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04214d23cf79f4ddcde36e299dea9f23f07abb88fa47ef399bf0e819438bbefd"},
+    {file = "pyclipper-1.3.0.post6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:aa604f8665ade434f9eafcd23f89435057d5d09427dfb4554c5e6d19f6d8aa1a"},
+    {file = "pyclipper-1.3.0.post6-cp36-cp36m-win32.whl", hash = "sha256:1fd56855ca92fa7eb0d8a71cf3a24b80b9724c8adcc89b385bbaa8924e620156"},
+    {file = "pyclipper-1.3.0.post6-cp36-cp36m-win_amd64.whl", hash = "sha256:6893f9b701f3132d86018594d99b724200b937a3a3ddfe1be0432c4ff0284e6e"},
+    {file = "pyclipper-1.3.0.post6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2737df106b8487103916147fe30f887aff439d9f2bd2f67c9d9b5c13eac88ccf"},
+    {file = "pyclipper-1.3.0.post6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33ab72260f144693e1f7735e93276c3031e1ed243a207eff1f8b98c7162ba22c"},
+    {file = "pyclipper-1.3.0.post6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:491ec1bfd2ee3013269c2b652dde14a85539480e0fb82f89bb12198fa59fff82"},
+    {file = "pyclipper-1.3.0.post6-cp37-cp37m-win32.whl", hash = "sha256:2e257009030815853528ba4b2ef7fb7e172683a3f4255a63f00bde34cfab8b58"},
+    {file = "pyclipper-1.3.0.post6-cp37-cp37m-win_amd64.whl", hash = "sha256:ed6e50c6e87ed190141573615d54118869bd63e9cd91ca5660d2ca926bf25110"},
+    {file = "pyclipper-1.3.0.post6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:cf0a535cfa02b207435928e991c60389671fe1ea1dfae79170973f82f52335b2"},
+    {file = "pyclipper-1.3.0.post6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:48dd55fbd55f63902cad511432ec332368cbbbc1dd2110c0c6c1e9edd735713a"},
+    {file = "pyclipper-1.3.0.post6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05ae2ea878fdfa31dd375326f6191b03de98a9602cc9c2b6d4ff960b20a974c"},
+    {file = "pyclipper-1.3.0.post6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:903176952a159c4195b8be55e597978e24804c838c7a9b12024c39704d341f72"},
+    {file = "pyclipper-1.3.0.post6-cp38-cp38-win32.whl", hash = "sha256:fb1e52cf4ee0a9fa8b2254ed589cc51b0c989efc58fa8804289aca94a21253f7"},
+    {file = "pyclipper-1.3.0.post6-cp38-cp38-win_amd64.whl", hash = "sha256:9cbdc517e75e647aa9bf6e356b3a3d2e3af344f82af38e36031eb46ba0ab5425"},
+    {file = "pyclipper-1.3.0.post6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:383f3433b968f2e4b0843f338c1f63b85392b6e1d936de722e8c5d4f577dbff5"},
+    {file = "pyclipper-1.3.0.post6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cf5ca2b9358d30a395ac6e14b3154a9fd1f9b557ad7153ea15cf697e88d07ce1"},
+    {file = "pyclipper-1.3.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3404dfcb3415eee863564b5f49be28a8c7fb99ad5e31c986bcc33c8d47d97df7"},
+    {file = "pyclipper-1.3.0.post6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:aa0e7268f8ceba218964bc3a482a5e9d32e352e8c3538b03f69a6b3db979078d"},
+    {file = "pyclipper-1.3.0.post6-cp39-cp39-win32.whl", hash = "sha256:47a214f201ff930595a30649c2a063f78baa3a8f52e1f38da19f7930c90ed80c"},
+    {file = "pyclipper-1.3.0.post6-cp39-cp39-win_amd64.whl", hash = "sha256:28bb590ae79e6beb15794eaee12b6f1d769589572d33e494faf5aa3b1f31b9fa"},
+    {file = "pyclipper-1.3.0.post6-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3e5e65176506da6335f6cbab497ae1a29772064467fa69f66de6bab4b6304d34"},
+    {file = "pyclipper-1.3.0.post6-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3d58202de8b8da4d1559afbda4e90a8c260a5373672b6d7bc5448c4614385144"},
+    {file = "pyclipper-1.3.0.post6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2cd8600bd16d209d5d45a33b45c278e1cc8bedc169af1a1f2187b581c521395"},
+    {file = "pyclipper-1.3.0.post6.tar.gz", hash = "sha256:42bff0102fa7a7f2abdd795a2594654d62b786d0c6cd67b72d469114fdeb608c"},
+]
+
 [[package]]
 name = "pycocotools"
 version = "2.0.10"
@@ -8800,6 +9150,25 @@ gcp-secret-manager = ["google-cloud-secret-manager (>=2.23.1)"]
 toml = ["tomli (>=2.0.1)"]
 yaml = ["pyyaml (>=6.0.1)"]
 
+[[package]]
+name = "pyee"
+version = "13.0.0"
+description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own"
+optional = true
+python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"scraping\""
+files = [
+    {file = "pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498"},
+    {file = "pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37"},
+]
+
+[package.dependencies]
+typing-extensions = "*"
+
+[package.extras]
+dev = ["black", "build", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "mypy", "pytest", "pytest-asyncio ; python_version >= \"3.4\"", "pytest-trio ; python_version >= \"3.7\"", "sphinx", "toml", "tox", "trio", "trio ; python_version > \"3.6\"", "trio-typing ; python_version > \"3.6\"", "twine", "twisted", "validate-pyproject[all]"]
+
 [[package]]
 name = "pyfiglet"
 version = "1.0.4"
@@ -8876,6 +9245,18 @@ dev = ["pyright", "ruff (==0.4.1)"]
 tests = ["boto3", "datafusion (==49.0.0)", "datasets", "duckdb", "ml-dtypes", "pandas", "pillow", "polars[pandas,pyarrow]", "psutil", "pytest", "tensorflow (<=2.19.0)", "tqdm"]
 torch = ["torch"]
 
+[[package]]
+name = "pylatexenc"
+version = "2.10"
+description = "Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion"
+optional = true
+python-versions = "*"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3"},
+]
+
 [[package]]
 name = "pylint"
 version = "3.3.8"
@@ -8942,6 +9323,123 @@ files = [
 [package.dependencies]
 pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""}
 
+[[package]]
+name = "pyobjc-core"
+version = "11.1"
+description = "Python<->ObjC Interoperability Module"
+optional = true
+python-versions = ">=3.8"
+groups = ["main"]
+markers = "sys_platform == \"darwin\" and extra == \"docling\""
+files = [
+    {file = "pyobjc_core-11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4c7536f3e94de0a3eae6bb382d75f1219280aa867cdf37beef39d9e7d580173c"},
+    {file = "pyobjc_core-11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ec36680b5c14e2f73d432b03ba7c1457dc6ca70fa59fd7daea1073f2b4157d33"},
+    {file = "pyobjc_core-11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:765b97dea6b87ec4612b3212258024d8496ea23517c95a1c5f0735f96b7fd529"},
+    {file = "pyobjc_core-11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:18986f83998fbd5d3f56d8a8428b2f3e0754fd15cef3ef786ca0d29619024f2c"},
+    {file = "pyobjc_core-11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8849e78cfe6595c4911fbba29683decfb0bf57a350aed8a43316976ba6f659d2"},
+    {file = "pyobjc_core-11.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8cb9ed17a8d84a312a6e8b665dd22393d48336ea1d8277e7ad20c19a38edf731"},
+    {file = "pyobjc_core-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:f2455683e807f8541f0d83fbba0f5d9a46128ab0d5cc83ea208f0bec759b7f96"},
+    {file = "pyobjc_core-11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4a99e6558b48b8e47c092051e7b3be05df1c8d0617b62f6fa6a316c01902d157"},
+    {file = "pyobjc_core-11.1.tar.gz", hash = "sha256:b63d4d90c5df7e762f34739b39cc55bc63dbcf9fb2fb3f2671e528488c7a87fe"},
+]
+
+[[package]]
+name = "pyobjc-framework-cocoa"
+version = "11.1"
+description = "Wrappers for the Cocoa frameworks on macOS"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "sys_platform == \"darwin\" and extra == \"docling\""
+files = [
+    {file = "pyobjc_framework_cocoa-11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b27a5bdb3ab6cdeb998443ff3fce194ffae5f518c6a079b832dbafc4426937f9"},
+    {file = "pyobjc_framework_cocoa-11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7b9a9b8ba07f5bf84866399e3de2aa311ed1c34d5d2788a995bdbe82cc36cfa0"},
+    {file = "pyobjc_framework_cocoa-11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:806de56f06dfba8f301a244cce289d54877c36b4b19818e3b53150eb7c2424d0"},
+    {file = "pyobjc_framework_cocoa-11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:54e93e1d9b0fc41c032582a6f0834befe1d418d73893968f3f450281b11603da"},
+    {file = "pyobjc_framework_cocoa-11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:fd5245ee1997d93e78b72703be1289d75d88ff6490af94462b564892e9266350"},
+    {file = "pyobjc_framework_cocoa-11.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:aede53a1afc5433e1e7d66568cc52acceeb171b0a6005407a42e8e82580b4fc0"},
+    {file = "pyobjc_framework_cocoa-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:1b5de4e1757bb65689d6dc1f8d8717de9ec8587eb0c4831c134f13aba29f9b71"},
+    {file = "pyobjc_framework_cocoa-11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bbee71eeb93b1b31ffbac8560b59a0524a8a4b90846a260d2c4f2188f3d4c721"},
+    {file = "pyobjc_framework_cocoa-11.1.tar.gz", hash = "sha256:87df76b9b73e7ca699a828ff112564b59251bb9bbe72e610e670a4dc9940d038"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=11.1"
+
+[[package]]
+name = "pyobjc-framework-coreml"
+version = "11.1"
+description = "Wrappers for the framework CoreML on macOS"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "sys_platform == \"darwin\" and extra == \"docling\""
+files = [
+    {file = "pyobjc_framework_coreml-11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b1b1b849ca91e0d62ed6dfd200d95ca8d023d6edff854aae77ba54eb0542415f"},
+    {file = "pyobjc_framework_coreml-11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b5be7889ad99da1aca040238fd99af9ee87ea8a6628f24d33e2e4890b88dd139"},
+    {file = "pyobjc_framework_coreml-11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c768b03d72488b964d753392e9c587684961d8237b69cca848b3a5a00aea79c9"},
+    {file = "pyobjc_framework_coreml-11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:10d51f8a5fe8d30c7ec70304a2324df76b48b9fbef30ee0f0c33b99a49ae8853"},
+    {file = "pyobjc_framework_coreml-11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4df25ee233430f016ffcb4e88506b54c8e7b668c93197e6a1341761530a5922c"},
+    {file = "pyobjc_framework_coreml-11.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:287a2a059016d02d8c40e0d29e70226142a4969db97ad79cefc70ec9bf0ab29e"},
+    {file = "pyobjc_framework_coreml-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:a479c3d759aff3695f72c7915a78df6e92e0eca7027abaa8b4a07e876ba1dbfb"},
+    {file = "pyobjc_framework_coreml-11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25e6e2185aefc46eb2a796eee6f4bef1cba3206f914b85ac659699468e9dc9a8"},
+    {file = "pyobjc_framework_coreml-11.1.tar.gz", hash = "sha256:775923eefb9eac2e389c0821b10564372de8057cea89f1ea1cdaf04996c970a7"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=11.1"
+pyobjc-framework-Cocoa = ">=11.1"
+
+[[package]]
+name = "pyobjc-framework-quartz"
+version = "11.1"
+description = "Wrappers for the Quartz frameworks on macOS"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "sys_platform == \"darwin\" and extra == \"docling\""
+files = [
+    {file = "pyobjc_framework_quartz-11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b5ef75c416b0209e25b2eb07a27bd7eedf14a8c6b2f968711969d45ceceb0f84"},
+    {file = "pyobjc_framework_quartz-11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2d501fe95ef15d8acf587cb7dc4ab4be3c5a84e2252017da8dbb7df1bbe7a72a"},
+    {file = "pyobjc_framework_quartz-11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9ac806067541917d6119b98d90390a6944e7d9bd737f5c0a79884202327c9204"},
+    {file = "pyobjc_framework_quartz-11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43a1138280571bbf44df27a7eef519184b5c4183a588598ebaaeb887b9e73e76"},
+    {file = "pyobjc_framework_quartz-11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b23d81c30c564adf6336e00b357f355b35aad10075dd7e837cfd52a9912863e5"},
+    {file = "pyobjc_framework_quartz-11.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:07cbda78b4a8fcf3a2d96e047a2ff01f44e3e1820f46f0f4b3b6d77ff6ece07c"},
+    {file = "pyobjc_framework_quartz-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:39d02a3df4b5e3eee1e0da0fb150259476910d2a9aa638ab94153c24317a9561"},
+    {file = "pyobjc_framework_quartz-11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9b1f451ddb5243d8d6316af55f240a02b0fffbfe165bff325628bf73f3df7f44"},
+    {file = "pyobjc_framework_quartz-11.1.tar.gz", hash = "sha256:a57f35ccfc22ad48c87c5932818e583777ff7276605fef6afad0ac0741169f75"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=11.1"
+pyobjc-framework-Cocoa = ">=11.1"
+
+[[package]]
+name = "pyobjc-framework-vision"
+version = "11.1"
+description = "Wrappers for the framework Vision on macOS"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "sys_platform == \"darwin\" and extra == \"docling\""
+files = [
+    {file = "pyobjc_framework_vision-11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c6f46df632096f070e16ba902a483fcb95c01fe12856a071bc2b25ac4a89bf3"},
+    {file = "pyobjc_framework_vision-11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bfbde43c9d4296e1d26548b6d30ae413e2029425968cd8bce96d3c5a735e8f2c"},
+    {file = "pyobjc_framework_vision-11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:df076c3e3e672887182953efc934c1f9683304737e792ec09a29bfee90d2e26a"},
+    {file = "pyobjc_framework_vision-11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1e5617e37dd2a7cff5e69e9aab039ea74b39ccdc528f6c828f2b60c1254e61e5"},
+    {file = "pyobjc_framework_vision-11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:dfd148a6df30ac70a9c41dd90a6c8f8c7f339bd9ca6829629a902f272e02b6b4"},
+    {file = "pyobjc_framework_vision-11.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d1f8fdccc6135fdbfd66d8f21240d6c84465cb8e116a8e5b43601aed020051e5"},
+    {file = "pyobjc_framework_vision-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d00830c71a30fc893b3c5ee65119c7e5e5a95a16af53b8e56a0e58cff57e3b56"},
+    {file = "pyobjc_framework_vision-11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25d2d42edc7459b010ec87a0c5428d12fe5d62dfa95cd34fb71f716f2e4d6b95"},
+    {file = "pyobjc_framework_vision-11.1.tar.gz", hash = "sha256:26590512ee7758da3056499062a344b8a351b178be66d4b719327884dde4216b"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=11.1"
+pyobjc-framework-Cocoa = ">=11.1"
+pyobjc-framework-CoreML = ">=11.1"
+pyobjc-framework-Quartz = ">=11.1"
+
 [[package]]
 name = "pypandoc"
 version = "1.15"
@@ -9000,7 +9498,7 @@ description = "Python bindings to PDFium"
 optional = true
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"},
     {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"},
@@ -9214,7 +9712,7 @@ description = "Create, read, and update Microsoft Word .docx files."
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7"},
     {file = "python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce"},
@@ -9335,7 +9833,7 @@ description = "Create, read, and update PowerPoint 2007+ (.pptx) files."
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba"},
     {file = "python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095"},
@@ -9354,7 +9852,7 @@ description = "World timezone definitions, modern and historical"
 optional = true
 python-versions = "*"
 groups = ["main"]
-markers = "extra == \"neo4j\" or extra == \"graphiti\" or extra == \"docs\" or extra == \"evals\" or extra == \"dlt\""
+markers = "extra == \"neo4j\" or extra == \"graphiti\" or extra == \"docs\" or extra == \"evals\" or extra == \"docling\" or extra == \"dlt\""
 files = [
     {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
     {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
@@ -9367,7 +9865,7 @@ description = "Python for Window Extensions"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "(platform_system == \"Windows\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or platform_python_implementation != \"PyPy\" or extra == \"dlt\")"
+markers = "(platform_system == \"Windows\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or platform_python_implementation != \"PyPy\" or extra == \"dlt\" or extra == \"docling\")"
 files = [
     {file = "pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3"},
     {file = "pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b"},
@@ -9700,6 +10198,31 @@ files = [
 [package.extras]
 all = ["numpy"]
 
+[[package]]
+name = "rapidocr"
+version = "3.4.2"
+description = "Awesome OCR Library"
+optional = true
+python-versions = "<4,>=3.6"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "rapidocr-3.4.2-py3-none-any.whl", hash = "sha256:17845fa8cc9a20a935111e59482f2214598bba1547000cfd960d8924dd4522a5"},
+]
+
+[package.dependencies]
+colorlog = "*"
+numpy = ">=1.19.5,<3.0.0"
+omegaconf = "*"
+opencv-python = ">=4.5.1.48"
+Pillow = "*"
+pyclipper = ">=1.2.0"
+PyYAML = "*"
+requests = "*"
+Shapely = ">=1.7.1,<2.0.4 || >2.0.4"
+six = ">=1.15.0"
+tqdm = "*"
+
 [[package]]
 name = "rdflib"
 version = "7.1.4"
@@ -9730,7 +10253,7 @@ description = "Python client for Redis database and key-value store"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"falkordb\""
+markers = "extra == \"redis\""
 files = [
     {file = "redis-5.3.1-py3-none-any.whl", hash = "sha256:dc1909bd24669cc31b5f67a039700b16ec30571096c5f1f0d9d2324bff31af97"},
     {file = "redis-5.3.1.tar.gz", hash = "sha256:ca49577a531ea64039b5a36db3d6cd1a0c7a60c34124d46924a45b956e8cf14c"},
@@ -10216,7 +10739,7 @@ description = "Pure-Python RSA implementation"
 optional = true
 python-versions = "<4,>=3.6"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"},
     {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"},
@@ -10225,6 +10748,26 @@ files = [
 [package.dependencies]
 pyasn1 = ">=0.1.3"
 
+[[package]]
+name = "rtree"
+version = "1.4.1"
+description = "R-Tree spatial index for Python GIS"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "rtree-1.4.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d672184298527522d4914d8ae53bf76982b86ca420b0acde9298a7a87d81d4a4"},
+    {file = "rtree-1.4.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a7e48d805e12011c2cf739a29d6a60ae852fb1de9fc84220bbcef67e6e595d7d"},
+    {file = "rtree-1.4.1-py3-none-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:efa8c4496e31e9ad58ff6c7df89abceac7022d906cb64a3e18e4fceae6b77f65"},
+    {file = "rtree-1.4.1-py3-none-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12de4578f1b3381a93a655846900be4e3d5f4cd5e306b8b00aa77c1121dc7e8c"},
+    {file = "rtree-1.4.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b558edda52eca3e6d1ee629042192c65e6b7f2c150d6d6cd207ce82f85be3967"},
+    {file = "rtree-1.4.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f155bc8d6bac9dcd383481dee8c130947a4866db1d16cb6dff442329a038a0dc"},
+    {file = "rtree-1.4.1-py3-none-win_amd64.whl", hash = "sha256:efe125f416fd27150197ab8521158662943a40f87acab8028a1aac4ad667a489"},
+    {file = "rtree-1.4.1-py3-none-win_arm64.whl", hash = "sha256:3d46f55729b28138e897ffef32f7ce93ac335cb67f9120125ad3742a220800f0"},
+    {file = "rtree-1.4.1.tar.gz", hash = "sha256:c6b1b3550881e57ebe530cc6cffefc87cd9bf49c30b37b894065a9f810875e46"},
+]
+
 [[package]]
 name = "ruff"
 version = "0.13.1"
@@ -10306,7 +10849,7 @@ description = ""
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docs\""
+markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docling\" or extra == \"docs\""
 files = [
     {file = "safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba"},
     {file = "safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b"},
@@ -10325,6 +10868,10 @@ files = [
     {file = "safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9"},
 ]
 
+[package.dependencies]
+numpy = {version = ">=1.21.6", optional = true, markers = "extra == \"numpy\""}
+torch = {version = ">=1.10", optional = true, markers = "extra == \"torch\""}
+
 [package.extras]
 all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"]
 dev = ["safetensors[all]"]
@@ -10403,7 +10950,7 @@ description = "Fundamental algorithms for scientific computing in Python"
 optional = true
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"docs\" or extra == \"evals\")"
+markers = "python_version == \"3.10\" and (extra == \"docs\" or extra == \"evals\" or extra == \"docling\")"
 files = [
     {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"},
     {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"},
@@ -10468,7 +11015,7 @@ description = "Fundamental algorithms for scientific computing in Python"
 optional = true
 python-versions = ">=3.11"
 groups = ["main"]
-markers = "python_version >= \"3.11\" and (extra == \"docs\" or extra == \"evals\")"
+markers = "python_version >= \"3.11\" and (extra == \"docs\" or extra == \"evals\" or extra == \"docling\")"
 files = [
     {file = "scipy-1.16.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6ab88ea43a57da1af33292ebd04b417e8e2eaf9d5aa05700be8d6e1b6501cd92"},
     {file = "scipy-1.16.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c95e96c7305c96ede73a7389f46ccd6c659c4da5ef1b2789466baeaed3622b6e"},
@@ -10541,6 +11088,23 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodest
 doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "linkify-it-py", "matplotlib (>=3.5)", "myst-nb (>=1.2.0)", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.2.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"]
 test = ["Cython", "array-api-strict (>=2.3.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest (>=8.0.0)", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
 
+[[package]]
+name = "semchunk"
+version = "2.2.2"
+description = "A fast and lightweight Python library for splitting text into semantically meaningful chunks."
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "semchunk-2.2.2-py3-none-any.whl", hash = "sha256:94ca19020c013c073abdfd06d79a7c13637b91738335f3b8cdb5655ee7cc94d2"},
+    {file = "semchunk-2.2.2.tar.gz", hash = "sha256:940e89896e64eeb01de97ba60f51c8c7b96c6a3951dfcf574f25ce2146752f52"},
+]
+
+[package.dependencies]
+mpire = {version = "*", extras = ["dill"]}
+tqdm = "*"
+
 [[package]]
 name = "semver"
 version = "3.0.4"
@@ -10639,7 +11203,7 @@ description = "Easily download, build, install, upgrade, and uninstall Python pa
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\") or python_version == \"3.12\" and (extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\" or extra == \"docs\") or python_full_version == \"3.13.0\" and (extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\" or extra == \"docs\") or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\") or python_version == \"3.12\" and (extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\" or extra == \"docs\" or extra == \"docling\") or python_full_version == \"3.13.0\" and (extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\" or extra == \"docs\" or extra == \"docling\") or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\""
 files = [
     {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
     {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
@@ -10654,6 +11218,81 @@ enabler = ["pytest-enabler (>=2.2)"]
 test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
 type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"]
 
+[[package]]
+name = "shapely"
+version = "2.1.2"
+description = "Manipulation and analysis of geometric objects"
+optional = true
+python-versions = ">=3.10"
+groups = ["main"]
+markers = "extra == \"docling\""
+files = [
+    {file = "shapely-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7ae48c236c0324b4e139bea88a306a04ca630f49be66741b340729d380d8f52f"},
+    {file = "shapely-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eba6710407f1daa8e7602c347dfc94adc02205ec27ed956346190d66579eb9ea"},
+    {file = "shapely-2.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef4a456cc8b7b3d50ccec29642aa4aeda959e9da2fe9540a92754770d5f0cf1f"},
+    {file = "shapely-2.1.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e38a190442aacc67ff9f75ce60aec04893041f16f97d242209106d502486a142"},
+    {file = "shapely-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:40d784101f5d06a1fd30b55fc11ea58a61be23f930d934d86f19a180909908a4"},
+    {file = "shapely-2.1.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f6f6cd5819c50d9bcf921882784586aab34a4bd53e7553e175dece6db513a6f0"},
+    {file = "shapely-2.1.2-cp310-cp310-win32.whl", hash = "sha256:fe9627c39c59e553c90f5bc3128252cb85dc3b3be8189710666d2f8bc3a5503e"},
+    {file = "shapely-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:1d0bfb4b8f661b3b4ec3565fa36c340bfb1cda82087199711f86a88647d26b2f"},
+    {file = "shapely-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91121757b0a36c9aac3427a651a7e6567110a4a67c97edf04f8d55d4765f6618"},
+    {file = "shapely-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:16a9c722ba774cf50b5d4541242b4cce05aafd44a015290c82ba8a16931ff63d"},
+    {file = "shapely-2.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cc4f7397459b12c0b196c9efe1f9d7e92463cbba142632b4cc6d8bbbbd3e2b09"},
+    {file = "shapely-2.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:136ab87b17e733e22f0961504d05e77e7be8c9b5a8184f685b4a91a84efe3c26"},
+    {file = "shapely-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:16c5d0fc45d3aa0a69074979f4f1928ca2734fb2e0dde8af9611e134e46774e7"},
+    {file = "shapely-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ddc759f72b5b2b0f54a7e7cde44acef680a55019eb52ac63a7af2cf17cb9cd2"},
+    {file = "shapely-2.1.2-cp311-cp311-win32.whl", hash = "sha256:2fa78b49485391224755a856ed3b3bd91c8455f6121fee0db0e71cefb07d0ef6"},
+    {file = "shapely-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:c64d5c97b2f47e3cd9b712eaced3b061f2b71234b3fc263e0fcf7d889c6559dc"},
+    {file = "shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94"},
+    {file = "shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359"},
+    {file = "shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3"},
+    {file = "shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b"},
+    {file = "shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc"},
+    {file = "shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d"},
+    {file = "shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454"},
+    {file = "shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179"},
+    {file = "shapely-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b510dda1a3672d6879beb319bc7c5fd302c6c354584690973c838f46ec3e0fa8"},
+    {file = "shapely-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8cff473e81017594d20ec55d86b54bc635544897e13a7cfc12e36909c5309a2a"},
+    {file = "shapely-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe7b77dc63d707c09726b7908f575fc04ff1d1ad0f3fb92aec212396bc6cfe5e"},
+    {file = "shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ed1a5bbfb386ee8332713bf7508bc24e32d24b74fc9a7b9f8529a55db9f4ee6"},
+    {file = "shapely-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a84e0582858d841d54355246ddfcbd1fce3179f185da7470f41ce39d001ee1af"},
+    {file = "shapely-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc3487447a43d42adcdf52d7ac73804f2312cbfa5d433a7d2c506dcab0033dfd"},
+    {file = "shapely-2.1.2-cp313-cp313-win32.whl", hash = "sha256:9c3a3c648aedc9f99c09263b39f2d8252f199cb3ac154fadc173283d7d111350"},
+    {file = "shapely-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:ca2591bff6645c216695bdf1614fca9c82ea1144d4a7591a466fef64f28f0715"},
+    {file = "shapely-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2d93d23bdd2ed9dc157b46bc2f19b7da143ca8714464249bef6771c679d5ff40"},
+    {file = "shapely-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01d0d304b25634d60bd7cf291828119ab55a3bab87dc4af1e44b07fb225f188b"},
+    {file = "shapely-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8d8382dd120d64b03698b7298b89611a6ea6f55ada9d39942838b79c9bc89801"},
+    {file = "shapely-2.1.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19efa3611eef966e776183e338b2d7ea43569ae99ab34f8d17c2c054d3205cc0"},
+    {file = "shapely-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:346ec0c1a0fcd32f57f00e4134d1200e14bf3f5ae12af87ba83ca275c502498c"},
+    {file = "shapely-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6305993a35989391bd3476ee538a5c9a845861462327efe00dd11a5c8c709a99"},
+    {file = "shapely-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:c8876673449f3401f278c86eb33224c5764582f72b653a415d0e6672fde887bf"},
+    {file = "shapely-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:4a44bc62a10d84c11a7a3d7c1c4fe857f7477c3506e24c9062da0db0ae0c449c"},
+    {file = "shapely-2.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9a522f460d28e2bf4e12396240a5fc1518788b2fcd73535166d748399ef0c223"},
+    {file = "shapely-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ff629e00818033b8d71139565527ced7d776c269a49bd78c9df84e8f852190c"},
+    {file = "shapely-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f67b34271dedc3c653eba4e3d7111aa421d5be9b4c4c7d38d30907f796cb30df"},
+    {file = "shapely-2.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21952dc00df38a2c28375659b07a3979d22641aeb104751e769c3ee825aadecf"},
+    {file = "shapely-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1f2f33f486777456586948e333a56ae21f35ae273be99255a191f5c1fa302eb4"},
+    {file = "shapely-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cf831a13e0d5a7eb519e96f58ec26e049b1fad411fc6fc23b162a7ce04d9cffc"},
+    {file = "shapely-2.1.2-cp314-cp314-win32.whl", hash = "sha256:61edcd8d0d17dd99075d320a1dd39c0cb9616f7572f10ef91b4b5b00c4aeb566"},
+    {file = "shapely-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:a444e7afccdb0999e203b976adb37ea633725333e5b119ad40b1ca291ecf311c"},
+    {file = "shapely-2.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5ebe3f84c6112ad3d4632b1fd2290665aa75d4cef5f6c5d77c4c95b324527c6a"},
+    {file = "shapely-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5860eb9f00a1d49ebb14e881f5caf6c2cf472c7fd38bd7f253bbd34f934eb076"},
+    {file = "shapely-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b705c99c76695702656327b819c9660768ec33f5ce01fa32b2af62b56ba400a1"},
+    {file = "shapely-2.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a1fd0ea855b2cf7c9cddaf25543e914dd75af9de08785f20ca3085f2c9ca60b0"},
+    {file = "shapely-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:df90e2db118c3671a0754f38e36802db75fe0920d211a27481daf50a711fdf26"},
+    {file = "shapely-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:361b6d45030b4ac64ddd0a26046906c8202eb60d0f9f53085f5179f1d23021a0"},
+    {file = "shapely-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:b54df60f1fbdecc8ebc2c5b11870461a6417b3d617f555e5033f1505d36e5735"},
+    {file = "shapely-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0036ac886e0923417932c2e6369b6c52e38e0ff5d9120b90eef5cd9a5fc5cae9"},
+    {file = "shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9"},
+]
+
+[package.dependencies]
+numpy = ">=1.21"
+
+[package.extras]
+docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"]
+test = ["pytest", "pytest-cov", "scipy-doctest"]
+
 [[package]]
 name = "shellingham"
 version = "1.5.4"
@@ -10851,7 +11490,7 @@ description = "A modern CSS selector implementation for Beautiful Soup."
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\""
+markers = "extra == \"scraping\" or extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\" or extra == \"docling\""
 files = [
     {file = "soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c"},
     {file = "soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f"},
@@ -11067,7 +11706,7 @@ description = "Pretty-print tabular data"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"deepeval\""
+markers = "extra == \"deepeval\" or extra == \"docling\""
 files = [
     {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
     {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
@@ -11076,6 +11715,24 @@ files = [
 [package.extras]
 widechars = ["wcwidth"]
 
+[[package]]
+name = "tavily-python"
+version = "0.7.12"
+description = "Python wrapper for the Tavily API"
+optional = true
+python-versions = ">=3.6"
+groups = ["main"]
+markers = "extra == \"scraping\""
+files = [
+    {file = "tavily_python-0.7.12-py3-none-any.whl", hash = "sha256:00d09b9de3ca02ef9a994cf4e7ae43d4ec9d199f0566ba6e52cbfcbd07349bd1"},
+    {file = "tavily_python-0.7.12.tar.gz", hash = "sha256:661945bbc9284cdfbe70fb50de3951fd656bfd72e38e352481d333a36ae91f5a"},
+]
+
+[package.dependencies]
+httpx = "*"
+requests = "*"
+tiktoken = ">=0.5.1"
+
 [[package]]
 name = "tenacity"
 version = "9.0.0"
@@ -11325,7 +11982,7 @@ description = "Tensors and Dynamic neural networks in Python with strong GPU acc
 optional = true
 python-versions = ">=3.9.0"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905"},
     {file = "torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011"},
@@ -11389,7 +12046,7 @@ description = "image and video datasets and models for torch deep learning"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "torchvision-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7266871daca00ad46d1c073e55d972179d12a58fa5c9adec9a3db9bbed71284a"},
     {file = "torchvision-0.23.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:31c583ba27426a3a04eca8c05450524105c1564db41be6632f7536ef405a6de2"},
@@ -11494,7 +12151,7 @@ description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
 optional = true
 python-versions = ">=3.9.0"
 groups = ["main"]
-markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docs\""
+markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docling\" or extra == \"docs\""
 files = [
     {file = "transformers-4.56.2-py3-none-any.whl", hash = "sha256:79c03d0e85b26cb573c109ff9eafa96f3c8d4febfd8a0774e8bba32702dd6dde"},
     {file = "transformers-4.56.2.tar.gz", hash = "sha256:5e7c623e2d7494105c726dd10f6f90c2c99a55ebe86eef7233765abd0cb1c529"},
@@ -11636,7 +12293,7 @@ description = "A language and compiler for custom Deep Learning operations"
 optional = true
 python-versions = "<3.14,>=3.9"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
+markers = "platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\") and platform_system == \"Linux\""
 files = [
     {file = "triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128"},
     {file = "triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467"},
@@ -11785,12 +12442,31 @@ description = "Provider of IANA time zone data"
 optional = true
 python-versions = ">=2"
 groups = ["main"]
-markers = "extra == \"docs\" or extra == \"evals\" or extra == \"dlt\""
+markers = "(platform_system == \"Windows\" or extra == \"docs\" or extra == \"evals\" or extra == \"docling\" or extra == \"dlt\") and (extra == \"scraping\" or extra == \"docs\" or extra == \"evals\" or extra == \"docling\" or extra == \"dlt\")"
 files = [
     {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"},
     {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"},
 ]
 
+[[package]]
+name = "tzlocal"
+version = "5.3.1"
+description = "tzinfo object for the local timezone"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"scraping\""
+files = [
+    {file = "tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d"},
+    {file = "tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd"},
+]
+
+[package.dependencies]
+tzdata = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"]
+
 [[package]]
 name = "unstructured"
 version = "0.18.15"
@@ -11970,19 +12646,6 @@ files = [
 [package.extras]
 dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-modern-annotations", "flake8-noqa", "flake8-pyproject", "flake8-requirements", "flake8-typechecking-import", "flake8-use-fstring", "mypy", "pep8-naming", "types-PyYAML"]
 
-[[package]]
-name = "uritemplate"
-version = "4.2.0"
-description = "Implementation of RFC 6570 URI Templates"
-optional = true
-python-versions = ">=3.9"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686"},
-    {file = "uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e"},
-]
-
 [[package]]
 name = "urllib3"
 version = "2.5.0"
@@ -12576,7 +13239,7 @@ description = "A Python module for creating Excel XLSX files."
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"docs\""
+markers = "extra == \"docs\" or extra == \"docling\""
 files = [
     {file = "xlsxwriter-3.2.9-py3-none-any.whl", hash = "sha256:9a5db42bc5dff014806c58a20b9eae7322a134abb6fce3c92c181bfb275ec5b3"},
     {file = "xlsxwriter-3.2.9.tar.gz", hash = "sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c"},
@@ -12846,10 +13509,9 @@ deepeval = ["deepeval"]
 dev = ["coverage", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pre-commit", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
 distributed = ["modal"]
 dlt = ["dlt"]
+docling = ["docling", "transformers"]
 docs = ["unstructured"]
 evals = ["gdown", "matplotlib", "pandas", "plotly", "scikit-learn"]
-falkordb = ["falkordb"]
-gemini = ["google-generativeai"]
 graphiti = ["graphiti-core"]
 groq = ["groq"]
 huggingface = ["transformers"]
@@ -12864,8 +13526,10 @@ ollama = ["transformers"]
 postgres = ["asyncpg", "pgvector", "psycopg2"]
 postgres-binary = ["asyncpg", "pgvector", "psycopg2-binary"]
 posthog = ["posthog"]
+redis = ["redis"]
+scraping = ["APScheduler", "beautifulsoup4", "lxml", "playwright", "protego", "tavily-python"]
 
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<=3.13"
-content-hash = "f56ce018c96211b8a67d74b4b53c3d333dd6aa964d4be4f9844db8710d130144"
+content-hash = "8d8172ac8ddc3c30ca79a1677ecf2a28897d52c0a564d8fb5646c8565c313a0f"
diff --git a/pyproject.toml b/pyproject.toml
index c8f71514b..30889a61e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -140,6 +140,7 @@ dev = [
     "mkdocstrings[python]>=0.26.2,<0.27",
 ]
 debug = ["debugpy>=1.8.9,<2.0.0"]
+redis = ["redis>=5.0.3,<6.0.0"]
 
 monitoring = ["sentry-sdk[fastapi]>=2.9.0,<3", "langfuse>=2.32.0,<3"]
 
diff --git a/uv.lock b/uv.lock
index 8bab2a8e5..4f94bf098 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1024,6 +1024,9 @@ postgres-binary = [
 posthog = [
     { name = "posthog" },
 ]
+redis = [
+    { name = "redis" },
+]
 scraping = [
     { name = "apscheduler" },
     { name = "beautifulsoup4" },
@@ -1114,6 +1117,7 @@ requires-dist = [
     { name = "python-magic-bin", marker = "sys_platform == 'win32'", specifier = "<0.5" },
     { name = "python-multipart", specifier = ">=0.0.20,<1.0.0" },
     { name = "rdflib", specifier = ">=7.1.4,<7.2.0" },
+    { name = "redis", marker = "extra == 'redis'", specifier = ">=5.0.3,<6.0.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<=0.13.1" },
     { name = "s3fs", extras = ["boto3"], marker = "extra == 'aws'", specifier = "==2025.3.2" },
     { name = "scikit-learn", marker = "extra == 'evals'", specifier = ">=1.6.1,<2" },
@@ -1134,7 +1138,7 @@ requires-dist = [
     { name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
     { name = "websockets", specifier = ">=15.0.1,<16.0.0" },
 ]
-provides-extras = ["api", "distributed", "scraping", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "monitoring", "docling"]
+provides-extras = ["api", "distributed", "scraping", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "redis", "monitoring", "docling"]
 
 [[package]]
 name = "colorama"
@@ -7389,6 +7393,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f4/31/e9b6f04288dcd3fa60cb3179260d6dad81b92aef3063d679ac7d80a827ea/rdflib-7.1.4-py3-none-any.whl", hash = "sha256:72f4adb1990fa5241abd22ddaf36d7cafa5d91d9ff2ba13f3086d339b213d997", size = 565051, upload-time = "2025-03-29T02:22:44.987Z" },
 ]
 
+[[package]]
+name = "redis"
+version = "5.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "async-timeout", marker = "python_full_version < '3.11.3'" },
+    { name = "pyjwt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6a/cf/128b1b6d7086200c9f387bd4be9b2572a30b90745ef078bd8b235042dc9f/redis-5.3.1.tar.gz", hash = "sha256:ca49577a531ea64039b5a36db3d6cd1a0c7a60c34124d46924a45b956e8cf14c", size = 4626200, upload-time = "2025-07-25T08:06:27.778Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/26/5c5fa0e83c3621db835cfc1f1d789b37e7fa99ed54423b5f519beb931aa7/redis-5.3.1-py3-none-any.whl", hash = "sha256:dc1909bd24669cc31b5f67a039700b16ec30571096c5f1f0d9d2324bff31af97", size = 272833, upload-time = "2025-07-25T08:06:26.317Z" },
+]
+
 [[package]]
 name = "referencing"
 version = "0.36.2"
diff --git a/working_dir_error_replication/run_subprocess_test.py b/working_dir_error_replication/run_subprocess_test.py
new file mode 100644
index 000000000..b97154a91
--- /dev/null
+++ b/working_dir_error_replication/run_subprocess_test.py
@@ -0,0 +1,31 @@
+"""
+Run writer and reader in separate subprocesses to test Kuzu locks.
+"""
+
+import subprocess
+import time
+import os
+
+
+def main():
+    print("=== Kuzu Subprocess Lock Test ===")
+    print("Starting writer and reader in separate subprocesses...")
+    print("Writer will hold the database lock, reader should block or fail\n")
+
+    start_time = time.time()
+
+    # Start writer subprocess
+    writer_process = subprocess.Popen([os.sys.executable, "writer.py"])
+
+    reader_process = subprocess.Popen([os.sys.executable, "reader.py"])
+
+    # Wait for both processes to complete
+    writer_process.wait()
+    reader_process.wait()
+
+    total_time = time.time() - start_time
+    print(f"\nTotal execution time: {total_time:.2f}s")
+
+
+if __name__ == "__main__":
+    main()

From 6a693d319add7eaf7875599e2f965c0da5d43096 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Thu, 16 Oct 2025 15:45:21 +0100
Subject: [PATCH 40/61] fix: preferred_loaders is always None in
 `data_item_to_text_file.load_file()`

---
 .../tasks/ingestion/data_item_to_text_file.py  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/cognee/tasks/ingestion/data_item_to_text_file.py b/cognee/tasks/ingestion/data_item_to_text_file.py
index 9fcafca57..dc0d1d0a7 100644
--- a/cognee/tasks/ingestion/data_item_to_text_file.py
+++ b/cognee/tasks/ingestion/data_item_to_text_file.py
@@ -48,17 +48,17 @@ async def data_item_to_text_file(
                 await pull_from_s3(data_item_path, temp_file)
                 temp_file.flush()  # Data needs to be saved to local storage
                 loader = get_loader_engine()
-                return await loader.load_file(temp_file.name, preferred_loaders), loader.get_loader(
-                    temp_file.name, preferred_loaders
-                )
+                return await loader.load_file(
+                    temp_file.name, None, preferred_loaders
+                ), loader.get_loader(temp_file.name, preferred_loaders)
 
         # data is local file path
         elif parsed_url.scheme == "file":
             if settings.accept_local_file_path:
                 loader = get_loader_engine()
-                return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
-                    data_item_path, preferred_loaders
-                )
+                return await loader.load_file(
+                    data_item_path, None, preferred_loaders
+                ), loader.get_loader(data_item_path, preferred_loaders)
             else:
                 raise IngestionError(message="Local files are not accepted.")
 
@@ -69,9 +69,9 @@ async def data_item_to_text_file(
             # Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
             if settings.accept_local_file_path:
                 loader = get_loader_engine()
-                return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
-                    data_item_path, preferred_loaders
-                )
+                return await loader.load_file(
+                    data_item_path, None, preferred_loaders
+                ), loader.get_loader(data_item_path, preferred_loaders)
             else:
                 raise IngestionError(message="Local files are not accepted.")
 

From 2998802c00961e36115bed93f5eda446e8500c75 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Fri, 17 Oct 2025 11:58:14 +0200
Subject: [PATCH 41/61] fix: Resolve issue with wrong error for OpenAI

---
 .../litellm_instructor/llm/openai/adapter.py                 | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
index 8877c2bdf..305b426b8 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@@ -156,10 +156,7 @@ class OpenAIAdapter(LLMInterface):
             InstructorRetryException,
         ) as e:
             if not (self.fallback_model and self.fallback_api_key):
-                raise ContentPolicyFilterError(
-                    f"The provided input contains content that is not aligned with our content policy: {text_input}"
-                ) from e
-
+                raise e
             try:
                 return await self.aclient.chat.completions.create(
                     model=self.fallback_model,

From 3ee50c192f0b3469858e5caf4992e8cfd8901d36 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Fri, 17 Oct 2025 12:01:06 +0100
Subject: [PATCH 42/61] refactor emptiness check to be boolean, and optimize
 query

---
 cognee/api/v1/search/search.py                   | 10 ++++------
 .../databases/graph/graph_db_interface.py        |  6 +++---
 .../databases/graph/kuzu/adapter.py              |  7 ++++---
 .../databases/graph/neo4j_driver/adapter.py      |  7 ++++---
 cognee/modules/data/exceptions/__init__.py       |  1 -
 cognee/modules/data/exceptions/exceptions.py     | 10 ----------
 cognee/tests/test_kuzu.py                        | 16 ++++++++--------
 cognee/tests/test_neo4j.py                       | 16 ++++++++--------
 8 files changed, 31 insertions(+), 42 deletions(-)

diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index 880a57b99..0caca619a 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -8,7 +8,7 @@ from cognee.modules.search.types import SearchResult, SearchType, CombinedSearch
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.search.methods import search as search_function
 from cognee.modules.data.methods import get_authorized_existing_datasets
-from cognee.modules.data.exceptions import DatasetNotFoundError, SearchOnEmptyGraphError
+from cognee.modules.data.exceptions import DatasetNotFoundError
 
 
 async def search(
@@ -177,12 +177,10 @@ async def search(
             raise DatasetNotFoundError(message="No datasets found.")
 
     graph_engine = await get_graph_engine()
-    nodes_count = await graph_engine.count_nodes()
+    is_empty = await graph_engine.is_empty()
 
-    if nodes_count == 0:
-        raise SearchOnEmptyGraphError(
-            message="Knowledge graph is empty, please ensure data is added and cognified."
-        )
+    if is_empty:
+        return []
 
     filtered_search_results = await search_function(
         query_text=query_text,
diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py
index d7542eac6..67df1a27c 100644
--- a/cognee/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognee/infrastructure/databases/graph/graph_db_interface.py
@@ -160,9 +160,9 @@ class GraphDBInterface(ABC):
     """
 
     @abstractmethod
-    async def count_nodes(self) -> int:
-        logger.warning("count_nodes is not implemented")
-        return 1  # dummy value to not fail search()
+    async def is_empty(self) -> bool:
+        logger.warning("is_empty() is not implemented")
+        return True
 
     @abstractmethod
     async def query(self, query: str, params: dict) -> List[Any]:
diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py
index 04c163efa..29ff92247 100644
--- a/cognee/infrastructure/databases/graph/kuzu/adapter.py
+++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py
@@ -185,13 +185,14 @@ class KuzuAdapter(GraphDBInterface):
         except FileNotFoundError:
             logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
 
-    async def count_nodes(self) -> int:
+    async def is_empty(self) -> bool:
         query = """
         MATCH (n)
-        RETURN COUNT(n);
+        RETURN true
+        LIMIT 1;
         """
         query_result = await self.query(query)
-        return query_result[0][0]
+        return len(query_result) == 0
 
     async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
         """
diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
index ac19069f4..5861b69cb 100644
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@@ -87,13 +87,14 @@ class Neo4jAdapter(GraphDBInterface):
         async with self.driver.session(database=self.graph_database_name) as session:
             yield session
 
-    async def count_nodes(self) -> int:
+    async def is_empty(self) -> bool:
         query = """
+        RETURN EXISTS {
         MATCH (n)
-        RETURN COUNT(n) as total_nodes;
+        } AS node_exists;
         """
         query_result = await self.query(query)
-        return query_result[0]["total_nodes"]
+        return not query_result[0]["node_exists"]
 
     @deadlock_retry()
     async def query(
diff --git a/cognee/modules/data/exceptions/__init__.py b/cognee/modules/data/exceptions/__init__.py
index ba943634d..54af81070 100644
--- a/cognee/modules/data/exceptions/__init__.py
+++ b/cognee/modules/data/exceptions/__init__.py
@@ -9,5 +9,4 @@ from .exceptions import (
     UnauthorizedDataAccessError,
     DatasetNotFoundError,
     DatasetTypeError,
-    SearchOnEmptyGraphError,
 )
diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py
index c2921750a..ac3b68e64 100644
--- a/cognee/modules/data/exceptions/exceptions.py
+++ b/cognee/modules/data/exceptions/exceptions.py
@@ -35,16 +35,6 @@ class DatasetNotFoundError(CogneeValidationError):
         super().__init__(message, name, status_code)
 
 
-class SearchOnEmptyGraphError(CogneeValidationError):
-    def __init__(
-        self,
-        message: str = "Knowledge graph is empty, please ensure data is added and cognified.",
-        name: str = "SearchOnEmptyGraphError",
-        status_code=status.HTTP_400_BAD_REQUEST,
-    ):
-        super().__init__(message, name, status_code)
-
-
 class DatasetTypeError(CogneeValidationError):
     def __init__(
         self,
diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py
index c07a51104..fe9da6dcb 100644
--- a/cognee/tests/test_kuzu.py
+++ b/cognee/tests/test_kuzu.py
@@ -51,21 +51,21 @@ async def main():
 
         graph_engine = await get_graph_engine()
 
-        nodes_count = await graph_engine.count_nodes()
+        is_empty = await graph_engine.is_empty()
 
-        assert nodes_count == 0, "Kuzu graph database is not empty"
+        assert is_empty, "Kuzu graph database is not empty"
 
         await cognee.add([explanation_file_path_quantum], dataset_name)
 
-        nodes_count = await graph_engine.count_nodes()
+        is_empty = await graph_engine.is_empty()
 
-        assert nodes_count == 0, "Kuzu graph database should be empty before cognify"
+        assert is_empty, "Kuzu graph database should be empty before cognify"
 
         await cognee.cognify([dataset_name])
 
-        nodes_count = await graph_engine.count_nodes()
+        is_empty = await graph_engine.is_empty()
 
-        assert nodes_count != 0, "Kuzu graph database should not be empty"
+        assert not is_empty, "Kuzu graph database should not be empty"
 
         from cognee.infrastructure.databases.vector import get_vector_engine
 
@@ -131,9 +131,9 @@ async def main():
 
         await cognee.prune.prune_system(metadata=True)
 
-        nodes_count = await graph_engine.count_nodes()
+        is_empty = await graph_engine.is_empty()
 
-        assert nodes_count == 0, "Kuzu graph database is not empty"
+        assert is_empty, "Kuzu graph database is not empty"
 
     finally:
         # Ensure cleanup even if tests fail
diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py
index 6f1fcf975..925614e67 100644
--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@@ -39,9 +39,9 @@ async def main():
 
     graph_engine = await get_graph_engine()
 
-    nodes_count = await graph_engine.count_nodes()
+    is_empty = await graph_engine.is_empty()
 
-    assert nodes_count == 0, "Graph has to be empty"
+    assert is_empty, "Graph has to be empty"
 
     await cognee.add([explanation_file_path_nlp], dataset_name)
 
@@ -50,15 +50,15 @@ async def main():
     )
 
     await cognee.add([explanation_file_path_quantum], dataset_name)
-    nodes_count = await graph_engine.count_nodes()
+    is_empty = await graph_engine.is_empty()
 
-    assert nodes_count == 0, "Graph has to be empty before cognify"
+    assert is_empty, "Graph has to be empty before cognify"
 
     await cognee.cognify([dataset_name])
 
-    nodes_count = await graph_engine.count_nodes()
+    is_empty = await graph_engine.is_empty()
 
-    assert nodes_count != 0, "Graph shouldn't be empty"
+    assert not is_empty, "Graph shouldn't be empty"
 
     from cognee.infrastructure.databases.vector import get_vector_engine
 
@@ -132,8 +132,8 @@ async def main():
     assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
     await cognee.prune.prune_system(metadata=True)
-    nodes_count = await graph_engine.count_nodes()
-    assert nodes_count == 0, "Neo4j graph database is not empty"
+    is_empty = await graph_engine.is_empty()
+    assert is_empty, "Neo4j graph database is not empty"
 
 
 if __name__ == "__main__":

From c313fcd02924eff3a08a8129b3b3b14f93f67ca0 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Fri, 17 Oct 2025 12:06:35 +0100
Subject: [PATCH 43/61] log warning on attempts to search on an empty knowledge
 graph

---
 cognee/api/v1/search/search.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index 0caca619a..9f158e9d0 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -9,6 +9,9 @@ from cognee.modules.users.methods import get_default_user
 from cognee.modules.search.methods import search as search_function
 from cognee.modules.data.methods import get_authorized_existing_datasets
 from cognee.modules.data.exceptions import DatasetNotFoundError
+from cognee.shared.logging_utils import get_logger
+
+logger = get_logger()
 
 
 async def search(
@@ -180,6 +183,7 @@ async def search(
     is_empty = await graph_engine.is_empty()
 
     if is_empty:
+        logger.warning("Search attempt on an empty knowledge graph")
         return []
 
     filtered_search_results = await search_function(

From 4e2a7778600bcea3992dbec4466939022e9b53c8 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Fri, 17 Oct 2025 14:18:44 +0100
Subject: [PATCH 44/61] tests: update tests after last refactoring

---
 cognee/tests/unit/api/test_search.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/cognee/tests/unit/api/test_search.py b/cognee/tests/unit/api/test_search.py
index aff9e5d38..54a4cc35f 100644
--- a/cognee/tests/unit/api/test_search.py
+++ b/cognee/tests/unit/api/test_search.py
@@ -1,6 +1,5 @@
 import pytest
 import cognee
-from cognee.modules.data.exceptions import SearchOnEmptyGraphError
 
 
 @pytest.mark.asyncio
@@ -8,16 +7,15 @@ async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
     await cognee.add("Sample input")
-    with pytest.raises(SearchOnEmptyGraphError):
-        await cognee.search("Sample query")
+    result = await cognee.search("Sample query")
+    assert result == []
 
 
+@pytest.mark.asyncio
 async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
     await cognee.add("Sample input")
     await cognee.cognify()
-    try:
-        await cognee.search("Sample query")
-    except SearchOnEmptyGraphError:
-        pytest.fail("Should not raise SearchOnEmptyGraphError when data was added and cognified")
+    result = await cognee.search("Sample query")
+    assert result != []

From 50aa8aac115f8fcf4011e1001e86adf9afc89594 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Fri, 17 Oct 2025 17:33:25 +0100
Subject: [PATCH 45/61] refactor: remove `filestream` arg from
 `LoaderEngine.load_file(...)`

---
 cognee/infrastructure/loaders/LoaderEngine.py  |  1 -
 .../tasks/ingestion/data_item_to_text_file.py  | 18 +++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/cognee/infrastructure/loaders/LoaderEngine.py b/cognee/infrastructure/loaders/LoaderEngine.py
index 87a008660..6b62f7641 100644
--- a/cognee/infrastructure/loaders/LoaderEngine.py
+++ b/cognee/infrastructure/loaders/LoaderEngine.py
@@ -105,7 +105,6 @@ class LoaderEngine:
     async def load_file(
         self,
         file_path: str,
-        file_stream: Optional[Any],
         preferred_loaders: Optional[List[str]] = None,
         **kwargs,
     ):
diff --git a/cognee/tasks/ingestion/data_item_to_text_file.py b/cognee/tasks/ingestion/data_item_to_text_file.py
index dc0d1d0a7..9fcafca57 100644
--- a/cognee/tasks/ingestion/data_item_to_text_file.py
+++ b/cognee/tasks/ingestion/data_item_to_text_file.py
@@ -48,17 +48,17 @@ async def data_item_to_text_file(
                 await pull_from_s3(data_item_path, temp_file)
                 temp_file.flush()  # Data needs to be saved to local storage
                 loader = get_loader_engine()
-                return await loader.load_file(
-                    temp_file.name, None, preferred_loaders
-                ), loader.get_loader(temp_file.name, preferred_loaders)
+                return await loader.load_file(temp_file.name, preferred_loaders), loader.get_loader(
+                    temp_file.name, preferred_loaders
+                )
 
         # data is local file path
         elif parsed_url.scheme == "file":
             if settings.accept_local_file_path:
                 loader = get_loader_engine()
-                return await loader.load_file(
-                    data_item_path, None, preferred_loaders
-                ), loader.get_loader(data_item_path, preferred_loaders)
+                return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
+                    data_item_path, preferred_loaders
+                )
             else:
                 raise IngestionError(message="Local files are not accepted.")
 
@@ -69,9 +69,9 @@ async def data_item_to_text_file(
             # Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
             if settings.accept_local_file_path:
                 loader = get_loader_engine()
-                return await loader.load_file(
-                    data_item_path, None, preferred_loaders
-                ), loader.get_loader(data_item_path, preferred_loaders)
+                return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
+                    data_item_path, preferred_loaders
+                )
             else:
                 raise IngestionError(message="Local files are not accepted.")
 

From 3f7efd8b888829d3e89a8120e4345782495ed3af Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 13:33:02 +0200
Subject: [PATCH 46/61] added fixes for tests

---
 .github/workflows/test_different_operating_systems.yml | 2 +-
 .github/workflows/test_suites.yml                      | 2 +-
 pyproject.toml                                         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml
index 00e387ac4..64f1a14f9 100644
--- a/.github/workflows/test_different_operating_systems.yml
+++ b/.github/workflows/test_different_operating_systems.yml
@@ -9,7 +9,7 @@ on:
       python-versions:
         required: false
         type: string
-        default: '["3.10.x", "3.11.x", "3.12.x"]'
+        default: '["3.10.x", "3.12.x", "3.13.x"]'
     secrets:
       LLM_PROVIDER:
         required: true
diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml
index 2f1bdebf0..5c1597a93 100644
--- a/.github/workflows/test_suites.yml
+++ b/.github/workflows/test_suites.yml
@@ -85,7 +85,7 @@ jobs:
     needs: [basic-tests, e2e-tests]
     uses: ./.github/workflows/test_different_operating_systems.yml
     with:
-      python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
+      python-versions: '["3.10.x", "3.11.x", "3.12.x", "3.13.x"]'
     secrets: inherit
 
   # Matrix-based vector database tests
diff --git a/pyproject.toml b/pyproject.toml
index 30889a61e..417786e90 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ authors = [
     { name = "Vasilije Markovic" },
     { name = "Boris Arzentar" },
 ]
-requires-python = ">=3.10,<=3.13"
+requires-python = ">=3.10,<3.14"
 readme = "README.md"
 license = "Apache-2.0"
 classifiers = [

From 66876daf8581ef27d6fad1c50c17628f9a3f5d03 Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 14:38:34 +0200
Subject: [PATCH 47/61] removed docs

---
 .github/actions/cognee_setup/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml
index 4017d524b..06e5bae6b 100644
--- a/.github/actions/cognee_setup/action.yml
+++ b/.github/actions/cognee_setup/action.yml
@@ -41,4 +41,4 @@ runs:
             EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
           done
         fi
-        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS
+        uv sync --extra api --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS

From a1927548adf0ba4197251d8008daef19cfc4030b Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 14:52:02 +0200
Subject: [PATCH 48/61] added

---
 .github/actions/cognee_setup/action.yml | 2 +-
 pyproject.toml                          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml
index 06e5bae6b..4017d524b 100644
--- a/.github/actions/cognee_setup/action.yml
+++ b/.github/actions/cognee_setup/action.yml
@@ -41,4 +41,4 @@ runs:
             EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
           done
         fi
-        uv sync --extra api --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS
+        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS
diff --git a/pyproject.toml b/pyproject.toml
index 417786e90..390028a6c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,7 +101,7 @@ chromadb = [
     "chromadb>=0.6,<0.7",
     "pypika==0.48.9",
 ]
-docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
+docs = ["lxml=6.0.2, unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
 codegraph = [
     "fastembed<=0.6.0 ; python_version < '3.13'",
     "transformers>=4.46.3,<5",

From 0c62916e75fac2281a6152ed84a74d476cb11437 Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 14:54:00 +0200
Subject: [PATCH 49/61] added

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 390028a6c..0f3c8c287 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,7 +101,7 @@ chromadb = [
     "chromadb>=0.6,<0.7",
     "pypika==0.48.9",
 ]
-docs = ["lxml=6.0.2, unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
+docs = ["lxml==6.0.2, unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
 codegraph = [
     "fastembed<=0.6.0 ; python_version < '3.13'",
     "transformers>=4.46.3,<5",

From 8900b31decbac106ccb4b985c7d90590ad4d87ff Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 14:57:40 +0200
Subject: [PATCH 50/61] added

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0f3c8c287..461aee301 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,7 +101,7 @@ chromadb = [
     "chromadb>=0.6,<0.7",
     "pypika==0.48.9",
 ]
-docs = ["lxml==6.0.2, unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
+docs = ["lxml==6.0.2", "unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
 codegraph = [
     "fastembed<=0.6.0 ; python_version < '3.13'",
     "transformers>=4.46.3,<5",

From aa577d438444fd0e82f892c7b23f5ca2b04c5a65 Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 15:02:53 +0200
Subject: [PATCH 51/61] added

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 461aee301..dae648f80 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,10 +65,10 @@ distributed = [
     "modal>=1.0.5,<2.0.0",
 ]
 scraping = [
-    "tavily-python>=0.7.0",
+    "tavily-python>=0.7.12",
     "beautifulsoup4>=4.13.1",
     "playwright>=1.9.0",
-    "lxml>=4.9.3,<5.0.0",
+    "lxml>=4.9.3",
     "protego>=0.1",
     "APScheduler>=3.10.0,<=3.11.0"
 ]

From 86ec2e9685aabbb0d54fc45a99c0ac131e3a89c4 Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 15:06:38 +0200
Subject: [PATCH 52/61] added

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index dae648f80..d4d8d535d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,7 +101,7 @@ chromadb = [
     "chromadb>=0.6,<0.7",
     "pypika==0.48.9",
 ]
-docs = ["lxml==6.0.2", "unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
+docs = ["lxml<6.0.0", "unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
 codegraph = [
     "fastembed<=0.6.0 ; python_version < '3.13'",
     "transformers>=4.46.3,<5",

From cbfa360b8f7726c1eec9bfd97d8297f2024664e3 Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 15:26:06 +0200
Subject: [PATCH 53/61] added lock file

---
 poetry.lock | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 62ae7be8d..c974a0b43 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -6633,7 +6633,7 @@ description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.11"
 groups = ["main"]
-markers = "python_version == \"3.12\" or python_full_version == \"3.13.0\""
+markers = "python_version >= \"3.12\""
 files = [
     {file = "numpy-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ffc4f5caba7dfcbe944ed674b7eef683c7e94874046454bb79ed7ee0236f59d"},
     {file = "numpy-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7e946c7170858a0295f79a60214424caac2ffdb0063d4d79cb681f9aa0aa569"},
@@ -8532,7 +8532,6 @@ files = [
     {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
     {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
     {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
-    {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
     {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
     {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
     {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@@ -11203,7 +11202,7 @@ description = "Easily download, build, install, upgrade, and uninstall Python pa
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\") or python_version == \"3.12\" and (extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\" or extra == \"docs\" or extra == \"docling\") or python_full_version == \"3.13.0\" and (extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\" or extra == \"docs\" or extra == \"docling\") or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (extra == \"docs\" or extra == \"docling\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\") or python_version >= \"3.12\" and (extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\" or extra == \"docs\" or extra == \"docling\") or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\""
 files = [
     {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
     {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
@@ -13510,7 +13509,7 @@ dev = ["coverage", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plug
 distributed = ["modal"]
 dlt = ["dlt"]
 docling = ["docling", "transformers"]
-docs = ["unstructured"]
+docs = ["lxml", "unstructured"]
 evals = ["gdown", "matplotlib", "pandas", "plotly", "scikit-learn"]
 graphiti = ["graphiti-core"]
 groq = ["groq"]
@@ -13531,5 +13530,5 @@ scraping = ["APScheduler", "beautifulsoup4", "lxml", "playwright", "protego", "t
 
 [metadata]
 lock-version = "2.1"
-python-versions = ">=3.10,<=3.13"
-content-hash = "8d8172ac8ddc3c30ca79a1677ecf2a28897d52c0a564d8fb5646c8565c313a0f"
+python-versions = ">=3.10,<3.14"
+content-hash = "bcab5420339473ec08b89cde588899b60999762fb8ca9a011240d47ea86198e3"

From 04719129a64809e28ed9c5e0af40dcd77a2e32dc Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Sun, 19 Oct 2025 15:53:38 +0200
Subject: [PATCH 54/61] updated env template

---
 .env.template | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.env.template b/.env.template
index 3137636d3..89ac06830 100644
--- a/.env.template
+++ b/.env.template
@@ -247,10 +247,10 @@ LITELLM_LOG="ERROR"
 #LLM_PROVIDER="ollama"
 #LLM_ENDPOINT="http://localhost:11434/v1"
 #EMBEDDING_PROVIDER="ollama"
-#EMBEDDING_MODEL="avr/sfr-embedding-mistral:latest"
+#EMBEDDING_MODEL="nomic-embed-text:latest"
 #EMBEDDING_ENDPOINT="http://localhost:11434/api/embeddings"
-#EMBEDDING_DIMENSIONS=4096
-#HUGGINGFACE_TOKENIZER="Salesforce/SFR-Embedding-Mistral"
+#EMBEDDING_DIMENSIONS=768
+#HUGGINGFACE_TOKENIZER="nomic-ai/nomic-embed-text-v1.5"
 
 ########## OpenRouter (also free) #########################################################
 

From 400095d76df23c33b7c4783654d381255459d0a4 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Sun, 19 Oct 2025 21:30:13 +0200
Subject: [PATCH 55/61] fix: Resolve issue with multi-user mode search

---
 cognee/api/v1/search/search.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index 9f158e9d0..4051bae86 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -179,13 +179,6 @@ async def search(
         if not datasets:
             raise DatasetNotFoundError(message="No datasets found.")
 
-    graph_engine = await get_graph_engine()
-    is_empty = await graph_engine.is_empty()
-
-    if is_empty:
-        logger.warning("Search attempt on an empty knowledge graph")
-        return []
-
     filtered_search_results = await search_function(
         query_text=query_text,
         query_type=query_type,

From f88277c467e81f3d63b0e2f713be3d06c3c19276 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Sun, 19 Oct 2025 23:10:53 +0200
Subject: [PATCH 56/61] fix: Resolve issue with plain text files not having
 magic file info

---
 cognee/infrastructure/files/utils/guess_file_type.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py
index edd2d89b0..dcdd68cad 100644
--- a/cognee/infrastructure/files/utils/guess_file_type.py
+++ b/cognee/infrastructure/files/utils/guess_file_type.py
@@ -124,6 +124,12 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
     """
     file_type = filetype.guess(file)
 
+    # If file type could not be determined consider it a plain text file as they don't have magic number encoding
+    if file_type is None:
+        from filetype.types.base import Type
+
+        file_type = Type("text/plain", "txt")
+
     if file_type is None:
         raise FileTypeException(f"Unknown file detected: {file.name}.")
 

From 8c627d9e10df49d8c2315592b664081fab45e486 Mon Sep 17 00:00:00 2001
From: Hande <159312713+hande-k@users.noreply.github.com>
Date: Mon, 20 Oct 2025 12:03:40 +0200
Subject: [PATCH 57/61] chore: update colab notebook on README

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a1eebae73..305bffdfe 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext
 
 ## Get Started
 
-Get started quickly with a Google Colab  <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or  <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
+Get started quickly with a Google Colab  <a href="https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or  <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
 
 
 ## About cognee
@@ -224,12 +224,12 @@ We now have a paper you can cite:
 
 ```bibtex
 @misc{markovic2025optimizinginterfaceknowledgegraphs,
-      title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning}, 
+      title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
       author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic},
       year={2025},
       eprint={2505.24478},
       archivePrefix={arXiv},
       primaryClass={cs.AI},
-      url={https://arxiv.org/abs/2505.24478}, 
+      url={https://arxiv.org/abs/2505.24478},
 }
 ```

From 279d6e80f03420838ae9e4ca81648563290d4d36 Mon Sep 17 00:00:00 2001
From: Daulet Amirkhanov <damirkhanov01@gmail.com>
Date: Mon, 20 Oct 2025 11:56:15 +0100
Subject: [PATCH 58/61] Revert "fix: search without prior cognify"

---
 cognee/api/v1/search/search.py                |  4 ----
 .../databases/graph/graph_db_interface.py     |  5 ----
 .../databases/graph/kuzu/adapter.py           |  9 --------
 .../databases/graph/neo4j_driver/adapter.py   |  9 --------
 cognee/tests/test_kuzu.py                     | 23 ++++---------------
 cognee/tests/test_neo4j.py                    | 22 ++++--------------
 cognee/tests/unit/api/test_search.py          | 21 -----------------
 7 files changed, 9 insertions(+), 84 deletions(-)
 delete mode 100644 cognee/tests/unit/api/test_search.py

diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index 4051bae86..0a9e76e96 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -1,7 +1,6 @@
 from uuid import UUID
 from typing import Union, Optional, List, Type
 
-from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.users.models import User
 from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
@@ -9,9 +8,6 @@ from cognee.modules.users.methods import get_default_user
 from cognee.modules.search.methods import search as search_function
 from cognee.modules.data.methods import get_authorized_existing_datasets
 from cognee.modules.data.exceptions import DatasetNotFoundError
-from cognee.shared.logging_utils import get_logger
-
-logger = get_logger()
 
 
 async def search(
diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py
index 67df1a27c..65afdf275 100644
--- a/cognee/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognee/infrastructure/databases/graph/graph_db_interface.py
@@ -159,11 +159,6 @@ class GraphDBInterface(ABC):
     - get_connections
     """
 
-    @abstractmethod
-    async def is_empty(self) -> bool:
-        logger.warning("is_empty() is not implemented")
-        return True
-
     @abstractmethod
     async def query(self, query: str, params: dict) -> List[Any]:
         """
diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py
index 2d3866888..3f0fb0c57 100644
--- a/cognee/infrastructure/databases/graph/kuzu/adapter.py
+++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py
@@ -198,15 +198,6 @@ class KuzuAdapter(GraphDBInterface):
         except FileNotFoundError:
             logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
 
-    async def is_empty(self) -> bool:
-        query = """
-        MATCH (n)
-        RETURN true
-        LIMIT 1;
-        """
-        query_result = await self.query(query)
-        return len(query_result) == 0
-
     async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
         """
         Execute a Kuzu query asynchronously with automatic reconnection.
diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
index 5861b69cb..520295ed2 100644
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@@ -87,15 +87,6 @@ class Neo4jAdapter(GraphDBInterface):
         async with self.driver.session(database=self.graph_database_name) as session:
             yield session
 
-    async def is_empty(self) -> bool:
-        query = """
-        RETURN EXISTS {
-        MATCH (n)
-        } AS node_exists;
-        """
-        query_result = await self.query(query)
-        return not query_result[0]["node_exists"]
-
     @deadlock_retry()
     async def query(
         self,
diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py
index fe9da6dcb..8749e42d0 100644
--- a/cognee/tests/test_kuzu.py
+++ b/cognee/tests/test_kuzu.py
@@ -47,26 +47,10 @@ async def main():
             pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
         )
 
-        from cognee.infrastructure.databases.graph import get_graph_engine
-
-        graph_engine = await get_graph_engine()
-
-        is_empty = await graph_engine.is_empty()
-
-        assert is_empty, "Kuzu graph database is not empty"
-
         await cognee.add([explanation_file_path_quantum], dataset_name)
 
-        is_empty = await graph_engine.is_empty()
-
-        assert is_empty, "Kuzu graph database should be empty before cognify"
-
         await cognee.cognify([dataset_name])
 
-        is_empty = await graph_engine.is_empty()
-
-        assert not is_empty, "Kuzu graph database should not be empty"
-
         from cognee.infrastructure.databases.vector import get_vector_engine
 
         vector_engine = get_vector_engine()
@@ -130,10 +114,11 @@ async def main():
         assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
         await cognee.prune.prune_system(metadata=True)
+        from cognee.infrastructure.databases.graph import get_graph_engine
 
-        is_empty = await graph_engine.is_empty()
-
-        assert is_empty, "Kuzu graph database is not empty"
+        graph_engine = await get_graph_engine()
+        nodes, edges = await graph_engine.get_graph_data()
+        assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
 
     finally:
         # Ensure cleanup even if tests fail
diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py
index 925614e67..c74b4ab65 100644
--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@@ -35,14 +35,6 @@ async def main():
     explanation_file_path_nlp = os.path.join(
         pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
     )
-    from cognee.infrastructure.databases.graph import get_graph_engine
-
-    graph_engine = await get_graph_engine()
-
-    is_empty = await graph_engine.is_empty()
-
-    assert is_empty, "Graph has to be empty"
-
     await cognee.add([explanation_file_path_nlp], dataset_name)
 
     explanation_file_path_quantum = os.path.join(
@@ -50,16 +42,9 @@ async def main():
     )
 
     await cognee.add([explanation_file_path_quantum], dataset_name)
-    is_empty = await graph_engine.is_empty()
-
-    assert is_empty, "Graph has to be empty before cognify"
 
     await cognee.cognify([dataset_name])
 
-    is_empty = await graph_engine.is_empty()
-
-    assert not is_empty, "Graph shouldn't be empty"
-
     from cognee.infrastructure.databases.vector import get_vector_engine
 
     vector_engine = get_vector_engine()
@@ -132,8 +117,11 @@ async def main():
     assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
 
     await cognee.prune.prune_system(metadata=True)
-    is_empty = await graph_engine.is_empty()
-    assert is_empty, "Neo4j graph database is not empty"
+    from cognee.infrastructure.databases.graph import get_graph_engine
+
+    graph_engine = await get_graph_engine()
+    nodes, edges = await graph_engine.get_graph_data()
+    assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty"
 
 
 if __name__ == "__main__":
diff --git a/cognee/tests/unit/api/test_search.py b/cognee/tests/unit/api/test_search.py
deleted file mode 100644
index 54a4cc35f..000000000
--- a/cognee/tests/unit/api/test_search.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import pytest
-import cognee
-
-
-@pytest.mark.asyncio
-async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
-    await cognee.prune.prune_data()
-    await cognee.prune.prune_system(metadata=True)
-    await cognee.add("Sample input")
-    result = await cognee.search("Sample query")
-    assert result == []
-
-
-@pytest.mark.asyncio
-async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
-    await cognee.prune.prune_data()
-    await cognee.prune.prune_system(metadata=True)
-    await cognee.add("Sample input")
-    await cognee.cognify()
-    result = await cognee.search("Sample query")
-    assert result != []

From 3e54b67b4d7f20c385afad0bc878943df9a0b86c Mon Sep 17 00:00:00 2001
From: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Date: Mon, 20 Oct 2025 15:03:35 +0200
Subject: [PATCH 59/61] fix: Resolve missing argument for distributed (#1563)

<!-- .github/pull_request_template.md -->

## Description
Resolve missing argument for distributed

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com>
---
 cognee/modules/pipelines/operations/run_tasks_distributed.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cognee/modules/pipelines/operations/run_tasks_distributed.py b/cognee/modules/pipelines/operations/run_tasks_distributed.py
index 95cdb0266..3fce3763d 100644
--- a/cognee/modules/pipelines/operations/run_tasks_distributed.py
+++ b/cognee/modules/pipelines/operations/run_tasks_distributed.py
@@ -88,6 +88,7 @@ async def run_tasks_distributed(
     pipeline_name: str = "unknown_pipeline",
     context: dict = None,
     incremental_loading: bool = False,
+    data_per_batch: int = 20,
 ):
     if not user:
         user = await get_default_user()

From df038365c848775229e1c9255d56992352b1990e Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Mon, 20 Oct 2025 17:27:49 +0200
Subject: [PATCH 60/61] fix: fixes id in get_filtered_graph_data (#1569)

<!-- .github/pull_request_template.md -->

## Description
Fixes get_filtered_graph_data method in neo4jAdapter.


## Type of Change
<!-- Please check the relevant option -->
- [x] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have added necessary documentation (if applicable)
- [x] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [x] I have linked any relevant issues in the description
- [x] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 cognee/infrastructure/databases/graph/neo4j_driver/adapter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
index 520295ed2..365d02979 100644
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@@ -1067,7 +1067,7 @@ class Neo4jAdapter(GraphDBInterface):
         query_nodes = f"""
         MATCH (n)
         WHERE {where_clause}
-        RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
+        RETURN n.id AS id, labels(n) AS labels, properties(n) AS properties
         """
         result_nodes = await self.query(query_nodes)
 
@@ -1082,7 +1082,7 @@ class Neo4jAdapter(GraphDBInterface):
         query_edges = f"""
         MATCH (n)-[r]->(m)
         WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
-        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
+        RETURN n.id AS source, n.id AS target, TYPE(r) AS type, properties(r) AS properties
         """
         result_edges = await self.query(query_edges)
 

From 612a2252ce012fc8929ffe6523ed6bc948a4db55 Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Tue, 21 Oct 2025 07:22:52 +0200
Subject: [PATCH 61/61] fix

---
 poetry.lock | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 80263027e..2773e61b9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -4366,6 +4366,8 @@ groups = ["main"]
 markers = "extra == \"dlt\""
 files = [
     {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
+    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
+    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
 ]
 
 [package.dependencies]
@@ -10208,6 +10210,13 @@ optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
+    {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"},
+    {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"},
+    {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"},
+    {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"},
+    {file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"},
+    {file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"},
+    {file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"},
     {file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"},
     {file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"},
     {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"},