Merge branch 'dev' into feature/cog-3165-add-load-tests

2025-10-21 13:39:24 +02:00 · 2025-10-21 13:39:24 +02:00 · 4ff0b3407e
commit 4ff0b3407e
parent c16459d236 0518b06462
52 changed files with 3318 additions and 3070 deletions
--- a/.env.template
+++ b/.env.template
@ -28,11 +28,10 @@ EMBEDDING_ENDPOINT=""
 EMBEDDING_API_VERSION=""
 EMBEDDING_DIMENSIONS=3072
 EMBEDDING_MAX_TOKENS=8191
+EMBEDDING_BATCH_SIZE=36
 # If embedding key is not provided same key set for LLM_API_KEY will be used
 #EMBEDDING_API_KEY="your_api_key"
-# Note: OpenAI support up to 2048 elements and Gemini supports a maximum of 100 elements in an embedding batch,
-#       Cognee sets the optimal batch size for OpenAI and Gemini, but a custom size can be defined if necessary for other models
-#EMBEDDING_BATCH_SIZE=2048
+

 # If using BAML structured output these env variables will be used
 BAML_LLM_PROVIDER=openai
@ -248,10 +247,10 @@ LITELLM_LOG="ERROR"
 #LLM_PROVIDER="ollama"
 #LLM_ENDPOINT="http://localhost:11434/v1"
 #EMBEDDING_PROVIDER="ollama"
-#EMBEDDING_MODEL="avr/sfr-embedding-mistral:latest"
+#EMBEDDING_MODEL="nomic-embed-text:latest"
 #EMBEDDING_ENDPOINT="http://localhost:11434/api/embeddings"
-#EMBEDDING_DIMENSIONS=4096
-#HUGGINGFACE_TOKENIZER="Salesforce/SFR-Embedding-Mistral"
+#EMBEDDING_DIMENSIONS=768
+#HUGGINGFACE_TOKENIZER="nomic-ai/nomic-embed-text-v1.5"

 ########## OpenRouter (also free) #########################################################

--- a/.github/actions/cognee_setup/action.yml
+++ b/.github/actions/cognee_setup/action.yml
@ -41,4 +41,4 @@ runs:
            EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
          done
        fi
-        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
+        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@ -1,4 +1,6 @@
 name: Reusable Integration Tests
+permissions:
+  contents: read

 on:
  workflow_call:
@ -264,3 +266,68 @@ jobs:
          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: uv run python ./cognee/tests/test_edge_ingestion.py
+
+
+
+  run_concurrent_subprocess_access_test:
+    name: Concurrent Subprocess access test
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash
+    services:
+      postgres:
+        image: pgvector/pgvector:pg17
+        env:
+          POSTGRES_USER: cognee
+          POSTGRES_PASSWORD: cognee
+          POSTGRES_DB: cognee_db
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 5432:5432
+
+      redis:
+        image: redis:7
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 5s
+          --health-timeout 3s
+          --health-retries 5
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+          extra-dependencies: "postgres redis"
+
+      - name: Run Concurrent subprocess access test (Kuzu/Lancedb/Postgres)
+        env:
+          ENV: dev
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          GRAPH_DATABASE_PROVIDER: 'kuzu'
+          CACHING: true
+          SHARED_KUZU_LOCK: true
+          DB_PROVIDER: 'postgres'
+          DB_NAME: 'cognee_db'
+          DB_HOST: '127.0.0.1'
+          DB_PORT: 5432
+          DB_USERNAME: cognee
+          DB_PASSWORD: cognee
+        run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py
--- a/.github/workflows/examples_tests.yml
+++ b/.github/workflows/examples_tests.yml
@ -110,6 +110,81 @@ jobs:
          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: uv run python ./examples/python/dynamic_steps_example.py

+  test-temporal-example:
+    name: Run Temporal Tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run Temporal Example
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./examples/python/temporal_example.py
+
+  test-ontology-example:
+    name: Run Ontology Tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run Ontology Demo Example
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./examples/python/ontology_demo_example.py
+
+  test-agentic-reasoning:
+    name: Run Agentic Reasoning Tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run Agentic Reasoning Example
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./examples/python/agentic_reasoning_procurement_example.py
+
  test-memify:
    name: Run Memify Example
    runs-on: ubuntu-22.04
--- a/.github/workflows/test_different_operating_systems.yml
+++ b/.github/workflows/test_different_operating_systems.yml
@ -9,7 +9,7 @@ on:
      python-versions:
        required: false
        type: string
-        default: '["3.10.x", "3.11.x", "3.12.x"]'
+        default: '["3.10.x", "3.12.x", "3.13.x"]'
    secrets:
      LLM_PROVIDER:
        required: true
@ -193,6 +193,13 @@ jobs:
        with:
          python-version: ${{ matrix.python-version }}

+      - name: Path setup
+        if: ${{ matrix.os }} == 'windows-latest'
+        shell: bash
+        run: |
+          PATH=$(printf '%s' "$PATH" | tr ':' $'\n' | grep -vi '/git/usr/bin' | paste -sd: -)
+          export PATH
+
      - name: Run Soft Deletion Tests
        env:
          ENV: 'dev'
--- a/.github/workflows/test_suites.yml
+++ b/.github/workflows/test_suites.yml
@ -85,7 +85,7 @@ jobs:
    needs: [basic-tests, e2e-tests]
    uses: ./.github/workflows/test_different_operating_systems.yml
    with:
-      python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
+      python-versions: '["3.10.x", "3.11.x", "3.12.x", "3.13.x"]'
    secrets: inherit

  # Matrix-based vector database tests
--- a/README.md
+++ b/README.md
@ -71,7 +71,7 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext

 ## Get Started

-Get started quickly with a Google Colab  <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or  <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
+Get started quickly with a Google Colab  <a href="https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or  <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>


 ## About cognee
@ -224,12 +224,12 @@ We now have a paper you can cite:

 ```bibtex
@misc{markovic2025optimizinginterfaceknowledgegraphs,
-      title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning}, 
+      title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
      author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic},
      year={2025},
      eprint={2505.24478},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
-      url={https://arxiv.org/abs/2505.24478}, 
+      url={https://arxiv.org/abs/2505.24478},
 }
 ```
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@ -41,6 +41,7 @@ async def add(
    extraction_rules: Optional[Dict[str, Any]] = None,
    tavily_config: Optional[BaseModel] = None,
    soup_crawler_config: Optional[BaseModel] = None,
+    data_per_batch: Optional[int] = 20,
 ):
    """
    Add data to Cognee for knowledge graph processing.
@ -235,6 +236,7 @@ async def add(
        vector_db_config=vector_db_config,
        graph_db_config=graph_db_config,
        incremental_loading=incremental_loading,
+        data_per_batch=data_per_batch,
    ):
        pipeline_run_info = run_info

--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -44,6 +44,7 @@ async def cognify(
    graph_model: BaseModel = KnowledgeGraph,
    chunker=TextChunker,
    chunk_size: int = None,
+    chunks_per_batch: int = None,
    config: Config = None,
    vector_db_config: dict = None,
    graph_db_config: dict = None,
@ -51,6 +52,7 @@ async def cognify(
    incremental_loading: bool = True,
    custom_prompt: Optional[str] = None,
    temporal_cognify: bool = False,
+    data_per_batch: int = 20,
 ):
    """
    Transform ingested data into a structured knowledge graph.
@ -105,6 +107,7 @@ async def cognify(
                   Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                   Default limits: ~512-8192 tokens depending on models.
                   Smaller chunks = more granular but potentially fragmented knowledge.
+        chunks_per_batch: Number of chunks to be processed in a single batch in Cognify tasks.
        vector_db_config: Custom vector database configuration for embeddings storage.
        graph_db_config: Custom graph database configuration for relationship storage.
        run_in_background: If True, starts processing asynchronously and returns immediately.
@ -209,10 +212,18 @@ async def cognify(
            }

    if temporal_cognify:
-        tasks = await get_temporal_tasks(user, chunker, chunk_size)
+        tasks = await get_temporal_tasks(
+            user=user, chunker=chunker, chunk_size=chunk_size, chunks_per_batch=chunks_per_batch
+        )
    else:
        tasks = await get_default_tasks(
-            user, graph_model, chunker, chunk_size, config, custom_prompt
+            user=user,
+            graph_model=graph_model,
+            chunker=chunker,
+            chunk_size=chunk_size,
+            config=config,
+            custom_prompt=custom_prompt,
+            chunks_per_batch=chunks_per_batch,
        )

    # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@ -228,6 +239,7 @@ async def cognify(
        graph_db_config=graph_db_config,
        incremental_loading=incremental_loading,
        pipeline_name="cognify_pipeline",
+        data_per_batch=data_per_batch,
    )


@ -238,6 +250,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
    chunk_size: int = None,
    config: Config = None,
    custom_prompt: Optional[str] = None,
+    chunks_per_batch: int = 100,
 ) -> list[Task]:
    if config is None:
        ontology_config = get_ontology_env_config()
@ -256,6 +269,9 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
            }

+    if chunks_per_batch is None:
+        chunks_per_batch = 100
+
    default_tasks = [
        Task(classify_documents),
        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@ -269,20 +285,20 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
            graph_model=graph_model,
            config=config,
            custom_prompt=custom_prompt,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": chunks_per_batch},
        ),  # Generate knowledge graphs from the document chunks.
        Task(
            summarize_text,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": chunks_per_batch},
        ),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
    ]

    return default_tasks


 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
 ) -> list[Task]:
    """
    Builds and returns a list of temporal processing tasks to be executed in sequence.
@ -299,10 +315,14 @@ async def get_temporal_tasks(
        user (User, optional): The user requesting task execution, used for permission checks.
        chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
        chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
+        chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify

    Returns:
        list[Task]: A list of Task objects representing the temporal processing pipeline.
    """
+    if chunks_per_batch is None:
+        chunks_per_batch = 10
+
    temporal_tasks = [
        Task(classify_documents),
        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@ -311,9 +331,9 @@ async def get_temporal_tasks(
            max_chunk_size=chunk_size or get_max_chunk_tokens(),
            chunker=chunker,
        ),
-        Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": chunks_per_batch}),
        Task(extract_knowledge_graph_from_events),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
    ]

    return temporal_tasks
--- a/cognee/infrastructure/databases/cache/init.py
+++ b/cognee/infrastructure/databases/cache/init.py
@ -0,0 +1,2 @@
+from .get_cache_engine import get_cache_engine
+from .config import get_cache_config
--- a/cognee/infrastructure/databases/cache/cache_db_interface.py
+++ b/cognee/infrastructure/databases/cache/cache_db_interface.py
@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+from contextlib import contextmanager
+
+
+class CacheDBInterface(ABC):
+    """
+    Abstract base class for distributed cache coordination systems (e.g., Redis, Memcached).
+    Provides a common interface for lock acquisition, release, and context-managed locking.
+    """
+
+    def __init__(self, host: str, port: int, lock_key: str):
+        self.host = host
+        self.port = port
+        self.lock_key = lock_key
+        self.lock = None
+
+    @abstractmethod
+    def acquire_lock(self):
+        """
+        Acquire a lock on the given key.
+        Must be implemented by subclasses.
+        """
+        pass
+
+    @abstractmethod
+    def release_lock(self):
+        """
+        Release the lock if it is held.
+        Must be implemented by subclasses.
+        """
+        pass
+
+    @contextmanager
+    def hold_lock(self):
+        """
+        Context manager for safely acquiring and releasing the lock.
+        """
+        self.acquire()
+        try:
+            yield
+        finally:
+            self.release()
--- a/cognee/infrastructure/databases/cache/config.py
+++ b/cognee/infrastructure/databases/cache/config.py
@ -0,0 +1,39 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from functools import lru_cache
+
+
+class CacheConfig(BaseSettings):
+    """
+    Configuration for distributed cache systems (e.g., Redis), used for locking or coordination.
+
+    Attributes:
+    - shared_kuzu_lock: Shared kuzu lock logic on/off.
+    - cache_host: Hostname of the cache service.
+    - cache_port: Port number for the cache service.
+    - agentic_lock_expire: Automatic lock expiration time (in seconds).
+    - agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
+    """
+
+    caching: bool = False
+    shared_kuzu_lock: bool = False
+    cache_host: str = "localhost"
+    cache_port: int = 6379
+    agentic_lock_expire: int = 240
+    agentic_lock_timeout: int = 300
+
+    model_config = SettingsConfigDict(env_file=".env", extra="allow")
+
+    def to_dict(self) -> dict:
+        return {
+            "caching": self.caching,
+            "shared_kuzu_lock": self.shared_kuzu_lock,
+            "cache_host": self.cache_host,
+            "cache_port": self.cache_port,
+            "agentic_lock_expire": self.agentic_lock_expire,
+            "agentic_lock_timeout": self.agentic_lock_timeout,
+        }
+
+
+@lru_cache
+def get_cache_config():
+    return CacheConfig()
--- a/cognee/infrastructure/databases/cache/get_cache_engine.py
+++ b/cognee/infrastructure/databases/cache/get_cache_engine.py
@ -0,0 +1,59 @@
+"""Factory to get the appropriate cache coordination engine (e.g., Redis)."""
+
+from functools import lru_cache
+from cognee.infrastructure.databases.cache.config import get_cache_config
+
+from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
+
+config = get_cache_config()
+
+
+@lru_cache
+def create_cache_engine(
+    cache_host: str,
+    cache_port: int,
+    lock_key: str,
+    agentic_lock_expire: int = 240,
+    agentic_lock_timeout: int = 300,
+):
+    """
+    Factory function to instantiate a cache coordination backend (currently Redis).
+
+    Parameters:
+    -----------
+    - cache_host: Hostname or IP of the cache server.
+    - cache_port: Port number to connect to.
+    - lock_key: Identifier used for the locking resource.
+    - agentic_lock_expire: Duration to hold the lock after acquisition.
+    - agentic_lock_timeout: Max time to wait for the lock before failing.
+
+    Returns:
+    --------
+    - CacheDBInterface: An instance of the appropriate cache adapter. :TODO: Now we support only Redis. later if we add more here we can split the logic
+    """
+    if config.caching:
+        from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter
+
+        return RedisAdapter(
+            host=cache_host,
+            port=cache_port,
+            lock_name=lock_key,
+            timeout=agentic_lock_expire,
+            blocking_timeout=agentic_lock_timeout,
+        )
+    else:
+        return None
+
+
+def get_cache_engine(lock_key: str) -> CacheDBInterface:
+    """
+    Returns a cache adapter instance using current context configuration.
+    """
+
+    return create_cache_engine(
+        cache_host=config.cache_host,
+        cache_port=config.cache_port,
+        lock_key=lock_key,
+        agentic_lock_expire=config.agentic_lock_expire,
+        agentic_lock_timeout=config.agentic_lock_timeout,
+    )
--- a/cognee/infrastructure/databases/cache/redis/RedisAdapter.py
+++ b/cognee/infrastructure/databases/cache/redis/RedisAdapter.py
@ -0,0 +1,49 @@
+import redis
+from contextlib import contextmanager
+from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
+
+
+class RedisAdapter(CacheDBInterface):
+    def __init__(self, host, port, lock_name, timeout=240, blocking_timeout=300):
+        super().__init__(host, port, lock_name)
+        self.redis = redis.Redis(host=host, port=port)
+        self.timeout = timeout
+        self.blocking_timeout = blocking_timeout
+
+    def acquire_lock(self):
+        """
+        Acquire the Redis lock manually. Raises if acquisition fails.
+        """
+        self.lock = self.redis.lock(
+            name=self.lock_key,
+            timeout=self.timeout,
+            blocking_timeout=self.blocking_timeout,
+        )
+
+        acquired = self.lock.acquire()
+        if not acquired:
+            raise RuntimeError(f"Could not acquire Redis lock: {self.lock_key}")
+
+        return self.lock
+
+    def release_lock(self):
+        """
+        Release the Redis lock manually, if held.
+        """
+        if self.lock:
+            try:
+                self.lock.release()
+                self.lock = None
+            except redis.exceptions.LockError:
+                pass
+
+    @contextmanager
+    def hold_lock(self):
+        """
+        Context manager for acquiring and releasing the Redis lock automatically.
+        """
+        self.acquire()
+        try:
+            yield
+        finally:
+            self.release()
--- a/cognee/infrastructure/databases/graph/get_graph_engine.py
+++ b/cognee/infrastructure/databases/graph/get_graph_engine.py
@ -162,5 +162,5 @@ def create_graph_engine(

    raise EnvironmentError(
        f"Unsupported graph database provider: {graph_database_provider}. "
-        f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
+        f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'neptune', 'neptune_analytics'])}"
    )
--- a/cognee/infrastructure/databases/graph/kuzu/adapter.py
+++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py
@ -4,7 +4,7 @@ import os
 import json
 import asyncio
 import tempfile
-from uuid import UUID
+from uuid import UUID, uuid5, NAMESPACE_OID
 from kuzu import Connection
 from kuzu.database import Database
 from datetime import datetime, timezone
@ -23,9 +23,14 @@ from cognee.infrastructure.engine import DataPoint
 from cognee.modules.storage.utils import JSONEncoder
 from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
 from cognee.tasks.temporal_graph.models import Timestamp
+from cognee.infrastructure.databases.cache.config import get_cache_config

 logger = get_logger()

+cache_config = get_cache_config()
+if cache_config.shared_kuzu_lock:
+    from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
+

 class KuzuAdapter(GraphDBInterface):
    """
@ -39,12 +44,20 @@ class KuzuAdapter(GraphDBInterface):

    def __init__(self, db_path: str):
        """Initialize Kuzu database connection and schema."""
+        self.open_connections = 0
+        self._is_closed = False
        self.db_path = db_path  # Path for the database directory
        self.db: Optional[Database] = None
        self.connection: Optional[Connection] = None
-        self.executor = ThreadPoolExecutor()
-        self._initialize_connection()
+        if cache_config.shared_kuzu_lock:
+            self.redis_lock = get_cache_engine(
+                lock_key="kuzu-lock-" + str(uuid5(NAMESPACE_OID, db_path))
+            )
+        else:
+            self.executor = ThreadPoolExecutor()
+            self._initialize_connection()
        self.KUZU_ASYNC_LOCK = asyncio.Lock()
+        self._connection_change_lock = asyncio.Lock()

    def _initialize_connection(self) -> None:
        """Initialize the Kuzu database connection and schema."""
@ -209,9 +222,13 @@ class KuzuAdapter(GraphDBInterface):
        params = params or {}

        def blocking_query():
+            lock_acquired = False
            try:
+                if cache_config.shared_kuzu_lock:
+                    self.redis_lock.acquire_lock()
+                    lock_acquired = True
                if not self.connection:
-                    logger.debug("Reconnecting to Kuzu database...")
+                    logger.info("Reconnecting to Kuzu database...")
                    self._initialize_connection()

                result = self.connection.execute(query, params)
@ -225,12 +242,47 @@ class KuzuAdapter(GraphDBInterface):
                            val = val.as_py()
                        processed_rows.append(val)
                    rows.append(tuple(processed_rows))
+
                return rows
            except Exception as e:
                logger.error(f"Query execution failed: {str(e)}")
                raise
+            finally:
+                if cache_config.shared_kuzu_lock and lock_acquired:
+                    try:
+                        self.close()
+                    finally:
+                        self.redis_lock.release_lock()

-        return await loop.run_in_executor(self.executor, blocking_query)
+        if cache_config.shared_kuzu_lock:
+            async with self._connection_change_lock:
+                self.open_connections += 1
+                logger.info(f"Open connections after open: {self.open_connections}")
+                try:
+                    result = blocking_query()
+                finally:
+                    self.open_connections -= 1
+                    logger.info(f"Open connections after close: {self.open_connections}")
+                return result
+        else:
+            result = await loop.run_in_executor(self.executor, blocking_query)
+            return result
+
+    def close(self):
+        if self.connection:
+            del self.connection
+            self.connection = None
+        if self.db:
+            del self.db
+            self.db = None
+        self._is_closed = True
+        logger.info("Kuzu database closed successfully")
+
+    def reopen(self):
+        if self._is_closed:
+            self._is_closed = False
+            self._initialize_connection()
+            logger.info("Kuzu database re-opened successfully")

    @asynccontextmanager
    async def get_session(self):
@ -1557,44 +1609,6 @@ class KuzuAdapter(GraphDBInterface):
            logger.error(f"Failed to delete graph data: {e}")
            raise

-    async def clear_database(self) -> None:
-        """
-        Clear all data from the database by deleting the database files and reinitializing.
-
-        This method removes all files associated with the database and reinitializes the Kuzu
-        database structure, ensuring a completely empty state. It handles exceptions that might
-        occur during file deletions or initializations carefully.
-        """
-        try:
-            if self.connection:
-                self.connection = None
-            if self.db:
-                self.db.close()
-                self.db = None
-
-            db_dir = os.path.dirname(self.db_path)
-            db_name = os.path.basename(self.db_path)
-            file_storage = get_file_storage(db_dir)
-
-            if await file_storage.file_exists(db_name):
-                await file_storage.remove_all()
-                logger.info(f"Deleted Kuzu database files at {self.db_path}")
-
-            # Reinitialize the database
-            self._initialize_connection()
-            # Verify the database is empty
-            result = self.connection.execute("MATCH (n:Node) RETURN COUNT(n)")
-            count = result.get_next()[0] if result.has_next() else 0
-            if count > 0:
-                logger.warning(
-                    f"Database still contains {count} nodes after clearing, forcing deletion"
-                )
-                self.connection.execute("MATCH (n:Node) DETACH DELETE n")
-            logger.info("Database cleared successfully")
-        except Exception as e:
-            logger.error(f"Error during database clearing: {e}")
-            raise
-
    async def get_document_subgraph(self, data_id: str):
        """
        Get all nodes that should be deleted when removing a document.
--- a/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
+++ b/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@ -1067,7 +1067,7 @@ class Neo4jAdapter(GraphDBInterface):
        query_nodes = f"""
        MATCH (n)
        WHERE {where_clause}
-        RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
+        RETURN n.id AS id, labels(n) AS labels, properties(n) AS properties
        """
        result_nodes = await self.query(query_nodes)

@ -1082,7 +1082,7 @@ class Neo4jAdapter(GraphDBInterface):
        query_edges = f"""
        MATCH (n)-[r]->(m)
        WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
-        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
+        RETURN n.id AS source, n.id AS target, TYPE(r) AS type, properties(r) AS properties
        """
        result_edges = await self.query(query_edges)

--- a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
@ -1,8 +1,17 @@
-from cognee.shared.logging_utils import get_logger
+import os
+import logging
 from typing import List, Optional
 from fastembed import TextEmbedding
 import litellm
-import os
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+
+from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 from cognee.infrastructure.databases.exceptions import EmbeddingException
 from cognee.infrastructure.llm.tokenizer.TikToken import (
@ -57,6 +66,13 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
            enable_mocking = str(enable_mocking).lower()
        self.mock = enable_mocking in ("true", "1", "yes")

+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def embed_text(self, text: List[str]) -> List[List[float]]:
        """
        Embed the given text into numerical vectors.
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@ -1,15 +1,21 @@
 import asyncio
+import logging
+
 from cognee.shared.logging_utils import get_logger
 from typing import List, Optional
 import numpy as np
 import math
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
 import litellm
 import os
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 from cognee.infrastructure.databases.exceptions import EmbeddingException
-from cognee.infrastructure.llm.tokenizer.Gemini import (
-    GeminiTokenizer,
-)
 from cognee.infrastructure.llm.tokenizer.HuggingFace import (
    HuggingFaceTokenizer,
 )
@ -19,10 +25,6 @@ from cognee.infrastructure.llm.tokenizer.Mistral import (
 from cognee.infrastructure.llm.tokenizer.TikToken import (
    TikTokenTokenizer,
 )
-from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter import (
-    embedding_rate_limit_async,
-    embedding_sleep_and_retry_async,
-)

 litellm.set_verbose = False
 logger = get_logger("LiteLLMEmbeddingEngine")
@ -76,8 +78,13 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
            enable_mocking = str(enable_mocking).lower()
        self.mock = enable_mocking in ("true", "1", "yes")

-    @embedding_sleep_and_retry_async()
-    @embedding_rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def embed_text(self, text: List[str]) -> List[List[float]]:
        """
        Embed a list of text strings into vector representations.
--- a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
@ -3,8 +3,16 @@ from cognee.shared.logging_utils import get_logger
 import aiohttp
 from typing import List, Optional
 import os
-
+import litellm
+import logging
 import aiohttp.http_exceptions
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)

 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 from cognee.infrastructure.llm.tokenizer.HuggingFace import (
@ -69,7 +77,6 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
            enable_mocking = str(enable_mocking).lower()
        self.mock = enable_mocking in ("true", "1", "yes")

-    @embedding_rate_limit_async
    async def embed_text(self, text: List[str]) -> List[List[float]]:
        """
        Generate embedding vectors for a list of text prompts.
@ -92,7 +99,13 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
        embeddings = await asyncio.gather(*[self._get_embedding(prompt) for prompt in text])
        return embeddings

-    @embedding_sleep_and_retry_async()
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def _get_embedding(self, prompt: str) -> List[float]:
        """
        Internal method to call the Ollama embeddings endpoint for a single prompt.
--- a/cognee/infrastructure/databases/vector/embeddings/config.py
+++ b/cognee/infrastructure/databases/vector/embeddings/config.py
@ -24,11 +24,10 @@ class EmbeddingConfig(BaseSettings):
    model_config = SettingsConfigDict(env_file=".env", extra="allow")

    def model_post_init(self, __context) -> None:
-        # If embedding batch size is not defined use 2048 as default for OpenAI and 100 for all other embedding models
        if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
-            self.embedding_batch_size = 2048
+            self.embedding_batch_size = 36
        elif not self.embedding_batch_size:
-            self.embedding_batch_size = 100
+            self.embedding_batch_size = 36

    def to_dict(self) -> dict:
        """
--- a/cognee/infrastructure/files/utils/guess_file_type.py
+++ b/cognee/infrastructure/files/utils/guess_file_type.py
@ -124,6 +124,12 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
    """
    file_type = filetype.guess(file)

+    # If file type could not be determined consider it a plain text file as they don't have magic number encoding
+    if file_type is None:
+        from filetype.types.base import Type
+
+        file_type = Type("text/plain", "txt")
+
    if file_type is None:
        raise FileTypeException(f"Unknown file detected: {file.name}.")

--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
@ -1,19 +1,24 @@
+import logging
 from typing import Type
 from pydantic import BaseModel
+import litellm
 import instructor
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)

-from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
-)
-
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.config import get_llm_config

+logger = get_logger()
+

 class AnthropicAdapter(LLMInterface):
    """
@ -35,8 +40,13 @@ class AnthropicAdapter(LLMInterface):
        self.model = model
        self.max_completion_tokens = max_completion_tokens

-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@ -12,11 +12,18 @@ from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
+import logging
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
 )

+logger = get_logger()
+

 class GeminiAdapter(LLMInterface):
    """
@ -58,8 +65,13 @@ class GeminiAdapter(LLMInterface):

        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)

-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
@ -12,11 +12,18 @@ from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
+import logging
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
 )

+logger = get_logger()
+

 class GenericAPIAdapter(LLMInterface):
    """
@ -58,8 +65,13 @@ class GenericAPIAdapter(LLMInterface):

        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)

-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
@ -1,20 +1,23 @@
 import litellm
 import instructor
 from pydantic import BaseModel
-from typing import Type, Optional
-from litellm import acompletion, JSONSchemaValidationError
+from typing import Type
+from litellm import JSONSchemaValidationError

 from cognee.shared.logging_utils import get_logger
 from cognee.modules.observability.get_observe import get_observe
-from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.config import get_llm_config
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
+
+import logging
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
 )

 logger = get_logger()
@ -47,8 +50,13 @@ class MistralAdapter(LLMInterface):
            api_key=get_llm_config().llm_api_key,
        )

-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
@ -99,31 +107,3 @@ class MistralAdapter(LLMInterface):
            logger.error(f"Schema validation failed: {str(e)}")
            logger.debug(f"Raw response: {e.raw_response}")
            raise ValueError(f"Response failed schema validation: {str(e)}")
-
-    def show_prompt(self, text_input: str, system_prompt: str) -> str:
-        """
-        Format and display the prompt for a user query.
-
-        Parameters:
-        -----------
-            - text_input (str): Input text from the user to be included in the prompt.
-            - system_prompt (str): The system prompt that will be shown alongside the user input.
-
-        Returns:
-        --------
-            - str: The formatted prompt string combining system prompt and user input.
-        """
-        if not text_input:
-            text_input = "No user input provided."
-        if not system_prompt:
-            raise MissingSystemPromptPathError()
-
-        system_prompt = LLMGateway.read_query_prompt(system_prompt)
-
-        formatted_prompt = (
-            f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
-            if system_prompt
-            else None
-        )
-
-        return formatted_prompt
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
@ -1,4 +1,6 @@
 import base64
+import litellm
+import logging
 import instructor
 from typing import Type
 from openai import OpenAI
@ -7,11 +9,17 @@ from pydantic import BaseModel
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    sleep_and_retry_async,
-)
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
+from cognee.shared.logging_utils import get_logger
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+
+logger = get_logger()


 class OllamaAPIAdapter(LLMInterface):
@ -47,8 +55,13 @@ class OllamaAPIAdapter(LLMInterface):
            OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
        )

-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
@ -90,7 +103,13 @@ class OllamaAPIAdapter(LLMInterface):

        return response

-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def create_transcript(self, input_file: str) -> str:
        """
        Generate an audio transcript from a user query.
@ -123,7 +142,13 @@ class OllamaAPIAdapter(LLMInterface):

        return transcription.text

-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def transcribe_image(self, input_file: str) -> str:
        """
        Transcribe content from an image using base64 encoding.
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@ -7,6 +7,15 @@ from openai import ContentFilterFinishReasonError
 from litellm.exceptions import ContentPolicyViolationError
 from instructor.core import InstructorRetryException

+import logging
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
@ -14,19 +23,13 @@ from cognee.infrastructure.llm.exceptions import (
    ContentPolicyFilterError,
 )
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
-    rate_limit_async,
-    rate_limit_sync,
-    sleep_and_retry_async,
-    sleep_and_retry_sync,
-)
 from cognee.modules.observability.get_observe import get_observe
 from cognee.shared.logging_utils import get_logger

-observe = get_observe()
-
 logger = get_logger()

+observe = get_observe()
+

 class OpenAIAdapter(LLMInterface):
    """
@ -97,8 +100,13 @@ class OpenAIAdapter(LLMInterface):
        self.fallback_endpoint = fallback_endpoint

    @observe(as_type="generation")
-    @sleep_and_retry_async()
-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
@ -148,10 +156,7 @@ class OpenAIAdapter(LLMInterface):
            InstructorRetryException,
        ) as e:
            if not (self.fallback_model and self.fallback_api_key):
-                raise ContentPolicyFilterError(
-                    f"The provided input contains content that is not aligned with our content policy: {text_input}"
-                ) from e
-
+                raise e
            try:
                return await self.aclient.chat.completions.create(
                    model=self.fallback_model,
@ -186,8 +191,13 @@ class OpenAIAdapter(LLMInterface):
                    ) from error

    @observe
-    @sleep_and_retry_sync()
-    @rate_limit_sync
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    def create_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
@ -231,7 +241,13 @@ class OpenAIAdapter(LLMInterface):
            max_retries=self.MAX_RETRIES,
        )

-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def create_transcript(self, input):
        """
        Generate an audio transcript from a user query.
@ -263,7 +279,13 @@ class OpenAIAdapter(LLMInterface):

        return transcription

-    @rate_limit_async
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
    async def transcribe_image(self, input) -> BaseModel:
        """
        Generate a transcription of an image from a user query.
--- a/cognee/infrastructure/loaders/LoaderEngine.py
+++ b/cognee/infrastructure/loaders/LoaderEngine.py
@ -105,7 +105,6 @@ class LoaderEngine:
    async def load_file(
        self,
        file_path: str,
-        file_stream: Optional[Any],
        preferred_loaders: Optional[List[str]] = None,
        **kwargs,
    ):
--- a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
+++ b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
@ -14,14 +14,6 @@ from cognee.infrastructure.loaders.external.pypdf_loader import PyPdfLoader

 logger = get_logger(__name__)

-try:
-    from unstructured.partition.pdf import partition_pdf
-except ImportError as e:
-    logger.info(
-        "unstructured[pdf] not installed, can't use AdvancedPdfLoader, will use PyPdfLoader instead."
-    )
-    raise ImportError from e
-

@dataclass
 class _PageBuffer:
@ -88,6 +80,8 @@ class AdvancedPdfLoader(LoaderInterface):
                **kwargs,
            }
            # Use partition to extract elements
+            from unstructured.partition.pdf import partition_pdf
+
            elements = partition_pdf(**partition_kwargs)

            # Process elements into text content
--- a/cognee/modules/pipelines/operations/pipeline.py
+++ b/cognee/modules/pipelines/operations/pipeline.py
@ -35,6 +35,7 @@ async def run_pipeline(
    vector_db_config: dict = None,
    graph_db_config: dict = None,
    incremental_loading: bool = False,
+    data_per_batch: int = 20,
 ):
    validate_pipeline_tasks(tasks)
    await setup_and_check_environment(vector_db_config, graph_db_config)
@ -50,6 +51,7 @@ async def run_pipeline(
            pipeline_name=pipeline_name,
            context={"dataset": dataset},
            incremental_loading=incremental_loading,
+            data_per_batch=data_per_batch,
        ):
            yield run_info

@ -62,6 +64,7 @@ async def run_pipeline_per_dataset(
    pipeline_name: str = "custom_pipeline",
    context: dict = None,
    incremental_loading=False,
+    data_per_batch: int = 20,
 ):
    # Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
    await set_database_global_context_variables(dataset.id, dataset.owner_id)
@ -77,7 +80,7 @@ async def run_pipeline_per_dataset(
        return

    pipeline_run = run_tasks(
-        tasks, dataset.id, data, user, pipeline_name, context, incremental_loading
+        tasks, dataset.id, data, user, pipeline_name, context, incremental_loading, data_per_batch
    )

    async for pipeline_run_info in pipeline_run:
--- a/cognee/modules/pipelines/operations/run_tasks.py
+++ b/cognee/modules/pipelines/operations/run_tasks.py
@ -24,7 +24,6 @@ from cognee.modules.pipelines.operations import (
    log_pipeline_run_complete,
    log_pipeline_run_error,
 )
-from .run_tasks_with_telemetry import run_tasks_with_telemetry
 from .run_tasks_data_item import run_tasks_data_item
 from ..tasks.task import Task

@ -60,6 +59,7 @@ async def run_tasks(
    pipeline_name: str = "unknown_pipeline",
    context: dict = None,
    incremental_loading: bool = False,
+    data_per_batch: int = 20,
 ):
    if not user:
        user = await get_default_user()
@ -89,24 +89,29 @@ async def run_tasks(
        if incremental_loading:
            data = await resolve_data_directories(data)

-        # Create async tasks per data item that will run the pipeline for the data item
-        data_item_tasks = [
-            asyncio.create_task(
-                run_tasks_data_item(
-                    data_item,
-                    dataset,
-                    tasks,
-                    pipeline_name,
-                    pipeline_id,
-                    pipeline_run_id,
-                    context,
-                    user,
-                    incremental_loading,
+        # Create and gather batches of async tasks of data items that will run the pipeline for the data item
+        results = []
+        for start in range(0, len(data), data_per_batch):
+            data_batch = data[start : start + data_per_batch]
+
+            data_item_tasks = [
+                asyncio.create_task(
+                    run_tasks_data_item(
+                        data_item,
+                        dataset,
+                        tasks,
+                        pipeline_name,
+                        pipeline_id,
+                        pipeline_run_id,
+                        context,
+                        user,
+                        incremental_loading,
+                    )
                )
-            )
-            for data_item in data
-        ]
-        results = await asyncio.gather(*data_item_tasks)
+                for data_item in data_batch
+            ]
+
+            results.extend(await asyncio.gather(*data_item_tasks))

        # Remove skipped data items from results
        results = [result for result in results if result]
--- a/cognee/modules/pipelines/operations/run_tasks_data_item.py
+++ b/cognee/modules/pipelines/operations/run_tasks_data_item.py
@ -115,9 +115,8 @@ async def run_tasks_data_item_incremental(
            data_point = (
                await session.execute(select(Data).filter(Data.id == data_id))
            ).scalar_one_or_none()
-            data_point.pipeline_status[pipeline_name] = {
-                str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
-            }
+            status_for_pipeline = data_point.pipeline_status.setdefault(pipeline_name, {})
+            status_for_pipeline[str(dataset.id)] = DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
            await session.merge(data_point)
            await session.commit()

--- a/cognee/modules/pipelines/operations/run_tasks_distributed.py
+++ b/cognee/modules/pipelines/operations/run_tasks_distributed.py
@ -88,6 +88,7 @@ async def run_tasks_distributed(
    pipeline_name: str = "unknown_pipeline",
    context: dict = None,
    incremental_loading: bool = False,
+    data_per_batch: int = 20,
 ):
    if not user:
        user = await get_default_user()
--- a/cognee/tasks/storage/index_data_points.py
+++ b/cognee/tasks/storage/index_data_points.py
@ -1,6 +1,6 @@
-from cognee.shared.logging_utils import get_logger
+import asyncio

-from cognee.infrastructure.databases.exceptions import EmbeddingException
+from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.engine import DataPoint

@ -33,18 +33,23 @@ async def index_data_points(data_points: list[DataPoint]):
            indexed_data_point.metadata["index_fields"] = [field_name]
            index_points[index_name].append(indexed_data_point)

-    for index_name_and_field, indexable_points in index_points.items():
-        first_occurence = index_name_and_field.index("_")
-        index_name = index_name_and_field[:first_occurence]
-        field_name = index_name_and_field[first_occurence + 1 :]
-        try:
-            # In case the amount of indexable points is too large we need to send them in batches
-            batch_size = vector_engine.embedding_engine.get_batch_size()
-            for i in range(0, len(indexable_points), batch_size):
-                batch = indexable_points[i : i + batch_size]
-                await vector_engine.index_data_points(index_name, field_name, batch)
-        except EmbeddingException as e:
-            logger.warning(f"Failed to index data points for {index_name}.{field_name}: {e}")
+    tasks: list[asyncio.Task] = []
+    batch_size = vector_engine.embedding_engine.get_batch_size()
+
+    for index_name_and_field, points in index_points.items():
+        first = index_name_and_field.index("_")
+        index_name = index_name_and_field[:first]
+        field_name = index_name_and_field[first + 1 :]
+
+        # Create embedding requests per batch to run in parallel later
+        for i in range(0, len(points), batch_size):
+            batch = points[i : i + batch_size]
+            tasks.append(
+                asyncio.create_task(vector_engine.index_data_points(index_name, field_name, batch))
+            )
+
+    # Run all embedding requests in parallel
+    await asyncio.gather(*tasks)

    return data_points

--- a/cognee/tasks/storage/index_graph_edges.py
+++ b/cognee/tasks/storage/index_graph_edges.py
@ -1,3 +1,5 @@
+import asyncio
+
 from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
 from cognee.shared.logging_utils import get_logger
 from collections import Counter
@ -76,15 +78,20 @@ async def index_graph_edges(
            indexed_data_point.metadata["index_fields"] = [field_name]
            index_points[index_name].append(indexed_data_point)

+    # Get maximum batch size for embedding model
+    batch_size = vector_engine.embedding_engine.get_batch_size()
+    tasks: list[asyncio.Task] = []
+
    for index_name, indexable_points in index_points.items():
        index_name, field_name = index_name.split(".")

-        # Get maximum batch size for embedding model
-        batch_size = vector_engine.embedding_engine.get_batch_size()
-        # We save the data in batches of {batch_size} to not put a lot of pressure on the database
+        # Create embedding tasks to run in parallel later
        for start in range(0, len(indexable_points), batch_size):
            batch = indexable_points[start : start + batch_size]

-            await vector_engine.index_data_points(index_name, field_name, batch)
+            tasks.append(vector_engine.index_data_points(index_name, field_name, batch))
+
+    # Start all embedding tasks and wait for completion
+    await asyncio.gather(*tasks)

    return None
--- a/cognee/tests/subprocesses/reader.py
+++ b/cognee/tests/subprocesses/reader.py
@ -0,0 +1,25 @@
+import asyncio
+import time
+from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
+
+# This will create the test.db if it doesn't exist
+
+
+async def main():
+    adapter = KuzuAdapter("test.db")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result} nodes")
+    result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
+    print(f"Reader: Found {result[0][0]} nodes")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/cognee/tests/subprocesses/simple_cognify_1.py
+++ b/cognee/tests/subprocesses/simple_cognify_1.py
@ -0,0 +1,31 @@
+import asyncio
+import cognee
+from cognee.shared.logging_utils import setup_logging, INFO
+from cognee.api.v1.search import SearchType
+
+
+async def main():
+    await cognee.cognify(datasets=["first_cognify_dataset"])
+
+    query_text = (
+        "Tell me what is in the context. Additionally write out 'FIRST_COGNIFY' before your answer"
+    )
+    search_results = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text=query_text,
+        datasets=["first_cognify_dataset"],
+    )
+
+    print("Search results:")
+    for result_text in search_results:
+        print(result_text)
+
+
+if __name__ == "__main__":
+    logger = setup_logging(log_level=INFO)
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.run_until_complete(loop.shutdown_asyncgens())
--- a/cognee/tests/subprocesses/simple_cognify_2.py
+++ b/cognee/tests/subprocesses/simple_cognify_2.py
@ -0,0 +1,31 @@
+import asyncio
+import cognee
+from cognee.shared.logging_utils import setup_logging, INFO
+from cognee.api.v1.search import SearchType
+
+
+async def main():
+    await cognee.cognify(datasets=["second_cognify_dataset"])
+
+    query_text = (
+        "Tell me what is in the context. Additionally write out 'SECOND_COGNIFY' before your answer"
+    )
+    search_results = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text=query_text,
+        datasets=["second_cognify_dataset"],
+    )
+
+    print("Search results:")
+    for result_text in search_results:
+        print(result_text)
+
+
+if __name__ == "__main__":
+    logger = setup_logging(log_level=INFO)
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.run_until_complete(loop.shutdown_asyncgens())
--- a/cognee/tests/subprocesses/writer.py
+++ b/cognee/tests/subprocesses/writer.py
@ -0,0 +1,32 @@
+import asyncio
+import time
+import uuid
+from cognee.modules.data.processing.document_types import PdfDocument
+from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
+
+
+def create_node(name):
+    document = PdfDocument(
+        id=uuid.uuid4(),
+        name=name,
+        raw_data_location=name,
+        external_metadata="test_external_metadata",
+        mime_type="test_mime",
+    )
+    return document
+
+
+async def main():
+    adapter = KuzuAdapter("test.db")
+    nodes = [create_node(f"Node{i}") for i in range(5)]
+
+    print("Writer: Starting...")
+    await adapter.add_nodes(nodes)
+
+    print("writer finished...")
+
+    time.sleep(10)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
+++ b/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
@ -1,7 +1,6 @@
 from typing import List
 from cognee.infrastructure.engine import DataPoint
 from cognee.tasks.storage.add_data_points import add_data_points
-from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
 import cognee
 from cognee.infrastructure.databases.graph import get_graph_engine
 import json
@ -64,7 +63,6 @@ async def create_connected_test_graph():


 async def get_metrics(provider: str, include_optional=True):
-    create_graph_engine.cache_clear()
    cognee.config.set_graph_database_provider(provider)
    graph_engine = await get_graph_engine()
    await graph_engine.delete_graph()
--- a/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
+++ b/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
@ -1,7 +1,12 @@
-from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import assert_metrics
 import asyncio


+async def main():
+    from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import assert_metrics
+
+    await assert_metrics(provider="neo4j", include_optional=False)
+    await assert_metrics(provider="neo4j", include_optional=True)
+
+
 if __name__ == "__main__":
-    asyncio.run(assert_metrics(provider="neo4j", include_optional=False))
-    asyncio.run(assert_metrics(provider="neo4j", include_optional=True))
+    asyncio.run(main())
--- a/cognee/tests/test_concurrent_subprocess_access.py
+++ b/cognee/tests/test_concurrent_subprocess_access.py
@ -0,0 +1,84 @@
+import os
+import asyncio
+import cognee
+import pathlib
+import subprocess
+
+from cognee.shared.logging_utils import get_logger
+
+logger = get_logger()
+
+"""
+Test: Redis-based Kùzu Locking Across Subprocesses
+
+This test ensures the Redis shared lock correctly serializes access to the Kùzu
+database when multiple subprocesses (writer/reader and cognify tasks) run in parallel.
+If this test fails, it indicates the locking mechanism is not properly handling
+concurrent subprocess access.
+"""
+
+
+async def concurrent_subprocess_access():
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/concurrent_tasks")
+        ).resolve()
+    )
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/concurrent_tasks")
+        ).resolve()
+    )
+
+    subprocess_directory_path = str(
+        pathlib.Path(os.path.join(pathlib.Path(__file__).parent, "subprocesses/")).resolve()
+    )
+
+    writer_path = subprocess_directory_path + "/writer.py"
+    reader_path = subprocess_directory_path + "/reader.py"
+
+    cognee.config.data_root_directory(data_directory_path)
+    cognee.config.system_root_directory(cognee_directory_path)
+
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    writer_process = subprocess.Popen([os.sys.executable, str(writer_path)])
+
+    reader_process = subprocess.Popen([os.sys.executable, str(reader_path)])
+
+    # Wait for both processes to complete
+    writer_process.wait()
+    reader_process.wait()
+
+    logger.info("Basic write read subprocess example finished")
+
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    text = """
+            This is the text of the first cognify subprocess
+            """
+    await cognee.add(text, dataset_name="first_cognify_dataset")
+
+    text = """
+            This is the text of the second cognify subprocess
+            """
+    await cognee.add(text, dataset_name="second_cognify_dataset")
+
+    first_cognify_path = subprocess_directory_path + "/simple_cognify_1.py"
+    second_cognify_path = subprocess_directory_path + "/simple_cognify_2.py"
+
+    first_cognify_process = subprocess.Popen([os.sys.executable, str(first_cognify_path)])
+
+    second_cognify_process = subprocess.Popen([os.sys.executable, str(second_cognify_path)])
+
+    # Wait for both processes to complete
+    first_cognify_process.wait()
+    second_cognify_process.wait()
+
+    logger.info("Database concurrent subprocess example finished")
+
+
+if __name__ == "__main__":
+    asyncio.run(concurrent_subprocess_access())
--- a/cognee/tests/test_memgraph.py
+++ b/cognee/tests/test_memgraph.py
@ -1,105 +0,0 @@
-import os
-
-import pathlib
-import cognee
-from cognee.infrastructure.files.storage import get_storage_config
-from cognee.modules.search.operations import get_history
-from cognee.modules.users.methods import get_default_user
-from cognee.shared.logging_utils import get_logger
-from cognee.modules.search.types import SearchType
-
-
-logger = get_logger()
-
-
-async def main():
-    cognee.config.set_graph_database_provider("memgraph")
-    data_directory_path = str(
-        pathlib.Path(
-            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_memgraph")
-        ).resolve()
-    )
-    cognee.config.data_root_directory(data_directory_path)
-    cognee_directory_path = str(
-        pathlib.Path(
-            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_memgraph")
-        ).resolve()
-    )
-    cognee.config.system_root_directory(cognee_directory_path)
-
-    await cognee.prune.prune_data()
-    await cognee.prune.prune_system(metadata=True)
-
-    dataset_name = "cs_explanations"
-
-    explanation_file_path_nlp = os.path.join(
-        pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
-    )
-    await cognee.add([explanation_file_path_nlp], dataset_name)
-
-    explanation_file_path_quantum = os.path.join(
-        pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
-    )
-
-    await cognee.add([explanation_file_path_quantum], dataset_name)
-
-    await cognee.cognify([dataset_name])
-
-    from cognee.infrastructure.databases.vector import get_vector_engine
-
-    vector_engine = get_vector_engine()
-    random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
-    random_node_name = random_node.payload["text"]
-
-    search_results = await cognee.search(
-        query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
-    )
-    assert len(search_results) != 0, "The search results list is empty."
-    print("\n\nExtracted sentences are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
-    assert len(search_results) != 0, "The search results list is empty."
-    print("\n\nExtracted chunks are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    search_results = await cognee.search(
-        query_type=SearchType.SUMMARIES, query_text=random_node_name
-    )
-    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\nExtracted results are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    search_results = await cognee.search(
-        query_type=SearchType.NATURAL_LANGUAGE,
-        query_text=f"Find nodes connected to node with name {random_node_name}",
-    )
-    assert len(search_results) != 0, "Query related natural language don't exist."
-    print("\nExtracted results are:\n")
-    for result in search_results:
-        print(f"{result}\n")
-
-    user = await get_default_user()
-    history = await get_history(user.id)
-
-    assert len(history) == 8, "Search history is not correct."
-
-    await cognee.prune.prune_data()
-    data_root_directory = get_storage_config()["data_root_directory"]
-    assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
-
-    await cognee.prune.prune_system(metadata=True)
-    from cognee.infrastructure.databases.graph import get_graph_engine
-
-    graph_engine = await get_graph_engine()
-    nodes, edges = await graph_engine.get_graph_data()
-    assert len(nodes) == 0 and len(edges) == 0, "Memgraph graph database is not empty"
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    asyncio.run(main())
--- a/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py
+++ b/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py
@ -0,0 +1,87 @@
+"""Tests for cache configuration."""
+
+import pytest
+from cognee.infrastructure.databases.cache.config import CacheConfig, get_cache_config
+
+
+def test_cache_config_defaults():
+    """Test that CacheConfig has the correct default values."""
+    config = CacheConfig()
+
+    assert config.caching is False
+    assert config.shared_kuzu_lock is False
+    assert config.cache_host == "localhost"
+    assert config.cache_port == 6379
+    assert config.agentic_lock_expire == 240
+    assert config.agentic_lock_timeout == 300
+
+
+def test_cache_config_custom_values():
+    """Test that CacheConfig accepts custom values."""
+    config = CacheConfig(
+        caching=True,
+        shared_kuzu_lock=True,
+        cache_host="redis.example.com",
+        cache_port=6380,
+        agentic_lock_expire=120,
+        agentic_lock_timeout=180,
+    )
+
+    assert config.caching is True
+    assert config.shared_kuzu_lock is True
+    assert config.cache_host == "redis.example.com"
+    assert config.cache_port == 6380
+    assert config.agentic_lock_expire == 120
+    assert config.agentic_lock_timeout == 180
+
+
+def test_cache_config_to_dict():
+    """Test the to_dict method returns all configuration values."""
+    config = CacheConfig(
+        caching=True,
+        shared_kuzu_lock=True,
+        cache_host="test-host",
+        cache_port=7000,
+        agentic_lock_expire=100,
+        agentic_lock_timeout=200,
+    )
+
+    config_dict = config.to_dict()
+
+    assert config_dict == {
+        "caching": True,
+        "shared_kuzu_lock": True,
+        "cache_host": "test-host",
+        "cache_port": 7000,
+        "agentic_lock_expire": 100,
+        "agentic_lock_timeout": 200,
+    }
+
+
+def test_get_cache_config_singleton():
+    """Test that get_cache_config returns the same instance."""
+    config1 = get_cache_config()
+    config2 = get_cache_config()
+
+    assert config1 is config2
+
+
+def test_cache_config_extra_fields_allowed():
+    """Test that CacheConfig allows extra fields due to extra='allow'."""
+    config = CacheConfig(extra_field="extra_value", another_field=123)
+
+    assert hasattr(config, "extra_field")
+    assert config.extra_field == "extra_value"
+    assert hasattr(config, "another_field")
+    assert config.another_field == 123
+
+
+def test_cache_config_boolean_type_validation():
+    """Test that boolean fields accept various truthy/falsy values."""
+    config1 = CacheConfig(caching="true", shared_kuzu_lock="yes")
+    assert config1.caching is True
+    assert config1.shared_kuzu_lock is True
+
+    config2 = CacheConfig(caching="false", shared_kuzu_lock="no")
+    assert config2.caching is False
+    assert config2.shared_kuzu_lock is False
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -129,6 +129,30 @@ services:
    networks:
      - cognee-network

+  redis:
+    image: redis:7-alpine
+    container_name: redis
+    profiles:
+      - redis
+    ports:
+      - "6379:6379"
+    networks:
+      - cognee-network
+    volumes:
+      - redis_data:/data
+    command: [ "redis-server", "--appendonly", "yes" ]
+
+
+  redisinsight:
+    image: redislabs/redisinsight:latest
+    container_name: redisinsight
+    restart: always
+    ports:
+      - "5540:5540"
+    networks:
+      - cognee-network
+
+
 networks:
  cognee-network:
    name: cognee-network
@ -136,3 +160,4 @@ networks:
 volumes:
  chromadb_data:
  postgres_data:
+  redis_data:
--- a/notebooks/neptune-analytics-example.ipynb
+++ b/notebooks/neptune-analytics-example.ipynb
@ -83,16 +83,16 @@
   ]
  },
  {
-   "metadata": {},
   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "import os\n",
    "import pathlib\n",
    "from cognee import config, add, cognify, search, SearchType, prune, visualize_graph\n",
    "from dotenv import load_dotenv"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
   "cell_type": "markdown",
@ -106,7 +106,9 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
+   "outputs": [],
   "source": [
    "# load environment variables from file .env\n",
    "load_dotenv()\n",
@ -145,9 +147,7 @@
    "        \"vector_db_url\": f\"neptune-graph://{graph_identifier}\",  # Neptune Analytics endpoint with the format neptune-graph://<GRAPH_ID>\n",
    "    }\n",
    ")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
   "cell_type": "markdown",
@ -159,19 +159,19 @@
   ]
  },
  {
-   "metadata": {},
   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Prune data and system metadata before running, only if we want \"fresh\" state.\n",
    "await prune.prune_data()\n",
    "await prune.prune_system(metadata=True)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
-   "metadata": {},
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "## Setup data and cognify\n",
    "\n",
@ -180,7 +180,9 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
+   "outputs": [],
   "source": [
    "# Add sample text to the dataset\n",
    "sample_text_1 = \"\"\"Neptune Analytics is a memory-optimized graph database engine for analytics. With Neptune\n",
@ -205,9 +207,7 @@
    "\n",
    "# Cognify the text data.\n",
    "await cognify([dataset_name])"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
   "cell_type": "markdown",
@ -215,14 +215,16 @@
   "source": [
    "## Graph Memory visualization\n",
    "\n",
-    "Initialize Memgraph as a Graph Memory store and save to .artefacts/graph_visualization.html\n",
+    "Initialize Neptune as a Graph Memory store and save to .artefacts/graph_visualization.html\n",
    "\n",
    "![visualization](./neptune_analytics_demo.png)"
   ]
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
+   "outputs": [],
   "source": [
    "# Get a graphistry url (Register for a free account at https://www.graphistry.com)\n",
    "# url = await render_graph()\n",
@ -235,9 +237,7 @@
    "    ).resolve()\n",
    ")\n",
    "await visualize_graph(graph_file_path)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
   "cell_type": "markdown",
@ -250,19 +250,19 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
+   "outputs": [],
   "source": [
    "# Completion query that uses graph data to form context.\n",
    "graph_completion = await search(query_text=\"What is Neptune Analytics?\", query_type=SearchType.GRAPH_COMPLETION)\n",
    "print(\"\\nGraph completion result is:\")\n",
    "print(graph_completion)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
-   "metadata": {},
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "## SEARCH: RAG Completion\n",
    "\n",
@ -271,19 +271,19 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
+   "outputs": [],
   "source": [
    "# Completion query that uses document chunks to form context.\n",
    "rag_completion = await search(query_text=\"What is Neptune Analytics?\", query_type=SearchType.RAG_COMPLETION)\n",
    "print(\"\\nRAG Completion result is:\")\n",
    "print(rag_completion)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
-   "metadata": {},
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "## SEARCH: Graph Insights\n",
    "\n",
@ -291,8 +291,10 @@
   ]
  },
  {
-   "metadata": {},
   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Search graph insights\n",
    "insights_results = await search(query_text=\"Neptune Analytics\", query_type=SearchType.GRAPH_COMPLETION)\n",
@ -302,13 +304,11 @@
    "    tgt_node = result[2].get(\"name\", result[2][\"type\"])\n",
    "    relationship = result[1].get(\"relationship_name\", \"__relationship__\")\n",
    "    print(f\"- {src_node} -[{relationship}]-> {tgt_node}\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
-   "metadata": {},
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "## SEARCH: Entity Summaries\n",
    "\n",
@ -316,8 +316,10 @@
   ]
  },
  {
-   "metadata": {},
   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Query all summaries related to query.\n",
    "summaries = await search(query_text=\"Neptune Analytics\", query_type=SearchType.SUMMARIES)\n",
@ -326,13 +328,11 @@
    "    type = summary[\"type\"]\n",
    "    text = summary[\"text\"]\n",
    "    print(f\"- {type}: {text}\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  },
  {
-   "metadata": {},
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "## SEARCH: Chunks\n",
    "\n",
@ -340,8 +340,10 @@
   ]
  },
  {
-   "metadata": {},
   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "chunks = await search(query_text=\"Neptune Analytics\", query_type=SearchType.CHUNKS)\n",
    "print(\"\\nChunk results are:\")\n",
@ -349,9 +351,7 @@
    "    type = chunk[\"type\"]\n",
    "    text = chunk[\"text\"]\n",
    "    print(f\"- {type}: {text}\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
  }
 ],
 "metadata": {
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ authors = [
    { name = "Vasilije Markovic" },
    { name = "Boris Arzentar" },
 ]
-requires-python = ">=3.10,<=3.13"
+requires-python = ">=3.10,<3.14"
 readme = "README.md"
 license = "Apache-2.0"
 classifiers = [
@ -56,6 +56,7 @@ dependencies = [
    "gunicorn>=20.1.0,<24",
    "websockets>=15.0.1,<16.0.0",
    "mistralai>=1.9.10",
+    "tenacity>=9.0.0",
 ]

 [project.optional-dependencies]
@ -64,14 +65,16 @@ api=[]
 distributed = [
    "modal>=1.0.5,<2.0.0",
 ]
+
 scraping = [
-    "tavily-python>=0.7.0",
+    "tavily-python>=0.7.12",
    "beautifulsoup4>=4.13.1",
    "playwright>=1.9.0",
-    "lxml>=4.9.3,<5.0.0",
+    "lxml>=4.9.3",
    "protego>=0.1",
    "APScheduler>=3.10.0,<=3.11.0"
 ]
+
 neo4j = ["neo4j>=5.28.0,<6"]
 neptune = ["langchain_aws>=0.2.22"]
 postgres = [
@ -101,7 +104,7 @@ chromadb = [
    "chromadb>=0.6,<0.7",
    "pypika==0.48.9",
 ]
-docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
+docs = ["lxml<6.0.0", "unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
 codegraph = [
    "fastembed<=0.6.0 ; python_version < '3.13'",
    "transformers>=4.46.3,<5",
@ -140,6 +143,7 @@ dev = [
    "mkdocstrings[python]>=0.26.2,<0.27",
 ]
 debug = ["debugpy>=1.8.9,<2.0.0"]
+redis = ["redis>=5.0.3,<6.0.0"]

 monitoring = ["sentry-sdk[fastapi]>=2.9.0,<3", "langfuse>=2.32.0,<3"]

--- a/uv.lock
+++ b/uv.lock
--- a/working_dir_error_replication/run_subprocess_test.py
+++ b/working_dir_error_replication/run_subprocess_test.py
@ -0,0 +1,31 @@
+"""
+Run writer and reader in separate subprocesses to test Kuzu locks.
+"""
+
+import subprocess
+import time
+import os
+
+
+def main():
+    print("=== Kuzu Subprocess Lock Test ===")
+    print("Starting writer and reader in separate subprocesses...")
+    print("Writer will hold the database lock, reader should block or fail\n")
+
+    start_time = time.time()
+
+    # Start writer subprocess
+    writer_process = subprocess.Popen([os.sys.executable, "writer.py"])
+
+    reader_process = subprocess.Popen([os.sys.executable, "reader.py"])
+
+    # Wait for both processes to complete
+    writer_process.wait()
+    reader_process.wait()
+
+    total_time = time.time() - start_time
+    print(f"\nTotal execution time: {total_time:.2f}s")
+
+
+if __name__ == "__main__":
+    main()