Merge branch 'dev' into feat/add-pdfproloader

Signed-off-by: EricXiao <taoiaox@gmail.com>
2025-09-20 17:26:03 +08:00 · 2025-09-20 17:26:03 +08:00 · d12ec0bc4f
commit d12ec0bc4f
parent 6107cb47ca 96eb0d448a
56 changed files with 1303 additions and 1126 deletions
--- a/.env.template
+++ b/.env.template
@ -116,7 +116,15 @@ VECTOR_DB_PROVIDER="lancedb"
 VECTOR_DB_URL=
 VECTOR_DB_KEY=

+################################################################################
+# 🧩 Ontology resolver settings
+################################################################################

+# -- Ontology resolver params --------------------------------------
+# ONTOLOGY_RESOLVER=rdflib  # Default: uses rdflib and owl file to read ontology structures
+# MATCHING_STRATEGY=fuzzy # Default: uses fuzzy matching with 80% similarity threshold
+# ONTOLOGY_FILE_PATH=YOUR_FULL_FULE_PATH # Default: empty
+# To add ontology resolvers, either set them as it is set in ontology_example or add full_path and settings as envs.

 ################################################################################
 #  🔄  MIGRATION (RELATIONAL → GRAPH) SETTINGS
--- a/.github/workflows/disable_independent_workflows.sh
+++ b/.github/workflows/disable_independent_workflows.sh
@ -10,7 +10,7 @@ WORKFLOWS=(
  "test_kuzu.yml"
  "test_multimetric_qa_eval_run.yaml"
  "test_graphrag_vs_rag_notebook.yml"
-  "test_gemini.yml"
+  "test_llms.yml"
  "test_multimedia_example.yaml"
  "test_deduplication.yml"
  "test_eval_framework.yml"
--- a/.github/workflows/test_gemini.yml
+++ b/.github/workflows/test_gemini.yml
@ -1,29 +0,0 @@
-name: test | gemini
-
-on:
-  workflow_call:
-
-jobs:
-  test-gemini:
-    name: Run Gemini Test
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-
-      - name: Cognee Setup
-        uses: ./.github/actions/cognee_setup
-        with:
-          python-version: '3.11.x'
-
-      - name: Run Gemini Simple Example
-        env:
-          LLM_PROVIDER: "gemini"
-          LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-          LLM_MODEL: "gemini/gemini-1.5-flash"
-          EMBEDDING_PROVIDER: "gemini"
-          EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-          EMBEDDING_MODEL: "gemini/text-embedding-004"
-          EMBEDDING_DIMENSIONS: "768"
-          EMBEDDING_MAX_TOKENS: "8076"
-        run: uv run python ./examples/python/simple_example.py
--- a/.github/workflows/test_llms.yml
+++ b/.github/workflows/test_llms.yml
@ -0,0 +1,86 @@
+name: LLM Test Suites
+
+permissions:
+  contents: read
+
+on:
+  workflow_call:
+
+env:
+  RUNTIME__LOG_LEVEL: ERROR
+  ENV: 'dev'
+
+jobs:
+  test-gemini:
+    name: Run Gemini Test
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run Gemini Simple Example
+        env:
+          LLM_PROVIDER: "gemini"
+          LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          LLM_MODEL: "gemini/gemini-1.5-flash"
+          EMBEDDING_PROVIDER: "gemini"
+          EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          EMBEDDING_MODEL: "gemini/text-embedding-004"
+          EMBEDDING_DIMENSIONS: "768"
+          EMBEDDING_MAX_TOKENS: "8076"
+        run: uv run python ./examples/python/simple_example.py
+
+  test-fastembed:
+    name: Run Fastembed Test
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run Fastembed Simple Example
+        env:
+          LLM_PROVIDER: "openai"
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_PROVIDER: "fastembed"
+          EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
+          EMBEDDING_DIMENSIONS: "384"
+          EMBEDDING_MAX_TOKENS: "256"
+        run: uv run python ./examples/python/simple_example.py
+
+  test-openrouter:
+    name: Run OpenRouter Test
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run OpenRouter Simple Example
+        env:
+          LLM_PROVIDER: "custom"
+          LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
+          LLM_ENDPOINT: "https://openrouter.ai/api/v1"
+          EMBEDDING_PROVIDER: "openai"
+          EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          EMBEDDING_MODEL: "openai/text-embedding-3-large"
+          EMBEDDING_DIMENSIONS: "3072"
+          EMBEDDING_MAX_TOKENS: "8191"
+        run: uv run python ./examples/python/simple_example.py
--- a/.github/workflows/test_openrouter.yml
+++ b/.github/workflows/test_openrouter.yml
@ -1,30 +0,0 @@
-name: test | openrouter
-
-on:
-  workflow_call:
-
-jobs:
-  test-openrouter:
-    name: Run OpenRouter Test
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-
-      - name: Cognee Setup
-        uses: ./.github/actions/cognee_setup
-        with:
-          python-version: '3.11.x'
-
-      - name: Run OpenRouter Simple Example
-        env:
-          LLM_PROVIDER: "custom"
-          LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
-          LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
-          LLM_ENDPOINT: "https://openrouter.ai/api/v1"
-          EMBEDDING_PROVIDER: "openai"
-          EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          EMBEDDING_MODEL: "openai/text-embedding-3-large"
-          EMBEDDING_DIMENSIONS: "3072"
-          EMBEDDING_MAX_TOKENS: "8191"
-        run: uv run python ./examples/python/simple_example.py
--- a/.github/workflows/test_suites.yml
+++ b/.github/workflows/test_suites.yml
@ -115,16 +115,10 @@ jobs:
    secrets: inherit

  # Additional LLM tests
-  gemini-tests:
-    name: Gemini Tests
-    needs: [basic-tests, e2e-tests]
-    uses: ./.github/workflows/test_gemini.yml
-    secrets: inherit
-
-  openrouter-tests:
-    name: OpenRouter Tests
-    needs: [basic-tests, e2e-tests]
-    uses: ./.github/workflows/test_openrouter.yml
+  llm-tests:
+    name: LLM Test Suite
+    needs: [ basic-tests, e2e-tests ]
+    uses: ./.github/workflows/test_llms.yml
    secrets: inherit

  # Ollama tests moved to the end
@ -138,8 +132,7 @@ jobs:
      different-operating-systems-tests,
      vector-db-tests,
      example-tests,
-      gemini-tests,
-      openrouter-tests,
+      llm-tests,
      mcp-test,
      relational-db-migration-tests,
      docker-compose-test,
@ -161,8 +154,7 @@ jobs:
      example-tests,
      db-examples-tests,
      mcp-test,
-      gemini-tests,
-      openrouter-tests,
+      llm-tests,
      ollama-tests,
      relational-db-migration-tests,
      docker-compose-test,
@ -183,8 +175,7 @@ jobs:
                "${{ needs.example-tests.result }}" == "success" &&
                "${{ needs.db-examples-tests.result }}" == "success" &&
                "${{ needs.relational-db-migration-tests.result }}" == "success" &&
-                "${{ needs.gemini-tests.result }}" == "success" &&
-                "${{ needs.openrouter-tests.result }}" == "success" &&
+                "${{ needs.llm-tests.result }}" == "success" &&
                "${{ needs.docker-compose-test.result }}" == "success" &&
                "${{ needs.docker-ci-test.result }}" == "success" &&
                "${{ needs.ollama-tests.result }}" == "success" ]]; then
--- a/README.md
+++ b/README.md
@ -176,16 +176,6 @@ You can also cognify your files and query using cognee UI.

 <img src="assets/cognee-new-ui.webp" width="100%" alt="Cognee UI 2"></a>

-### Installation for UI
-
-To use the cognee UI with full functionality, you need to install cognee with API dependencies:
-
-```bash
-pip install 'cognee[api]'
-```
-
-The UI requires backend server functionality (uvicorn and other API dependencies) which are not included in the default cognee installation to keep it lightweight.
-
 ### Running the UI

 Try cognee UI by running ``` cognee-cli -ui ``` command on your terminal.
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -3,6 +3,7 @@ from pydantic import BaseModel
 from typing import Union, Optional
 from uuid import UUID

+from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
 from cognee.shared.logging_utils import get_logger
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.infrastructure.llm import get_max_chunk_tokens
@ -10,7 +11,11 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
 from cognee.modules.pipelines import run_pipeline
 from cognee.modules.pipelines.tasks.task import Task
 from cognee.modules.chunking.TextChunker import TextChunker
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.ontology_config import Config
+from cognee.modules.ontology.get_default_ontology_resolver import (
+    get_default_ontology_resolver,
+    get_ontology_resolver_from_env,
+)
 from cognee.modules.users.models import User

 from cognee.tasks.documents import (
@ -39,7 +44,7 @@ async def cognify(
    graph_model: BaseModel = KnowledgeGraph,
    chunker=TextChunker,
    chunk_size: int = None,
-    ontology_file_path: Optional[str] = None,
+    config: Config = None,
    vector_db_config: dict = None,
    graph_db_config: dict = None,
    run_in_background: bool = False,
@ -100,8 +105,6 @@ async def cognify(
                   Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                   Default limits: ~512-8192 tokens depending on models.
                   Smaller chunks = more granular but potentially fragmented knowledge.
-        ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
-                          Useful for specialized fields like medical or legal documents.
        vector_db_config: Custom vector database configuration for embeddings storage.
        graph_db_config: Custom graph database configuration for relationship storage.
        run_in_background: If True, starts processing asynchronously and returns immediately.
@ -188,11 +191,28 @@ async def cognify(
        - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
        - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
    """
+    if config is None:
+        ontology_config = get_ontology_env_config()
+        if (
+            ontology_config.ontology_file_path
+            and ontology_config.ontology_resolver
+            and ontology_config.matching_strategy
+        ):
+            config: Config = {
+                "ontology_config": {
+                    "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
+                }
+            }
+        else:
+            config: Config = {
+                "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
+            }
+
    if temporal_cognify:
        tasks = await get_temporal_tasks(user, chunker, chunk_size)
    else:
        tasks = await get_default_tasks(
-            user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
+            user, graph_model, chunker, chunk_size, config, custom_prompt
        )

    # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@ -216,9 +236,26 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
    graph_model: BaseModel = KnowledgeGraph,
    chunker=TextChunker,
    chunk_size: int = None,
-    ontology_file_path: Optional[str] = None,
+    config: Config = None,
    custom_prompt: Optional[str] = None,
 ) -> list[Task]:
+    if config is None:
+        ontology_config = get_ontology_env_config()
+        if (
+            ontology_config.ontology_file_path
+            and ontology_config.ontology_resolver
+            and ontology_config.matching_strategy
+        ):
+            config: Config = {
+                "ontology_config": {
+                    "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
+                }
+            }
+        else:
+            config: Config = {
+                "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
+            }
+
    default_tasks = [
        Task(classify_documents),
        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@ -230,7 +267,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
        Task(
            extract_graph_from_data,
            graph_model=graph_model,
-            ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
+            config=config,
            custom_prompt=custom_prompt,
            task_config={"batch_size": 10},
        ),  # Generate knowledge graphs from the document chunks.
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@ -82,6 +82,9 @@ async def search(
            Best for: General-purpose queries or when you're unsure which search type is best.
            Returns: The results from the automatically selected search type.

+        **CHUNKS_LEXICAL**:
+            Token-based lexical chunk search (e.g., Jaccard). Best for: exact-term matching, stopword-aware lookups.
+            Returns: Ranked text chunks (optionally with scores).

    Args:
        query_text: Your question or search query in natural language.
--- a/cognee/base_config.py
+++ b/cognee/base_config.py
@ -11,7 +11,7 @@ class BaseConfig(BaseSettings):
    data_root_directory: str = get_absolute_path(".data_storage")
    system_root_directory: str = get_absolute_path(".cognee_system")
    cache_root_directory: str = get_absolute_path(".cognee_cache")
-    monitoring_tool: object = Observer.LANGFUSE
+    monitoring_tool: object = Observer.NONE

    @pydantic.model_validator(mode="after")
    def validate_paths(self):
@ -30,7 +30,10 @@ class BaseConfig(BaseSettings):
        # Require absolute paths for root directories
        self.data_root_directory = ensure_absolute_path(self.data_root_directory)
        self.system_root_directory = ensure_absolute_path(self.system_root_directory)
-        self.cache_root_directory = ensure_absolute_path(self.cache_root_directory)
+        # Set monitoring tool based on available keys
+        if self.langfuse_public_key and self.langfuse_secret_key:
+            self.monitoring_tool = Observer.LANGFUSE
+
        return self

    langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
--- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
+++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
 from cognee.tasks.graph import extract_graph_from_data
 from cognee.tasks.storage import add_data_points
 from cognee.shared.data_models import KnowledgeGraph
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver


 async def get_default_tasks_by_indices(
@ -33,7 +33,7 @@ async def get_no_summary_tasks(
    # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
    base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)

-    ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
+    ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)

    graph_task = Task(
        extract_graph_from_data,
--- a/cognee/modules/data/methods/create_authorized_dataset.py
+++ b/cognee/modules/data/methods/create_authorized_dataset.py
@ -6,6 +6,15 @@ from .create_dataset import create_dataset


 async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
+    """
+        Create a new dataset and give all permissions on this dataset to the given user.
+    Args:
+        dataset_name: Name of the dataset.
+        user: The user object.
+
+    Returns:
+        Dataset: The new authorized dataset.
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/data/methods/get_authorized_dataset.py
+++ b/cognee/modules/data/methods/get_authorized_dataset.py
@ -15,7 +15,7 @@ async def get_authorized_dataset(
    Get a specific dataset with permissions for a user.

    Args:
-        user_id (UUID): user id
+        user: User object
        dataset_id (UUID): dataset id
        permission_type (str): permission type(read, write, delete, share), default is read

--- a/cognee/modules/data/methods/get_authorized_dataset_by_name.py
+++ b/cognee/modules/data/methods/get_authorized_dataset_by_name.py
@ -11,6 +11,17 @@ from ..models import Dataset
 async def get_authorized_dataset_by_name(
    dataset_name: str, user: User, permission_type: str
 ) -> Optional[Dataset]:
+    """
+    Get a specific dataset with the given name, with permissions for a given user.
+
+    Args:
+        dataset_name: Name of the dataset.
+        user: User object.
+        permission_type (str): permission type(read, write, delete, share), default is read
+
+    Returns:
+        Optional[Dataset]: dataset with permissions
+    """
    authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)

    return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)
--- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
+++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
    generate_node_id,
    generate_node_name,
 )
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
 from cognee.shared.data_models import KnowledgeGraph
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.get_default_ontology_resolver import (
+    get_default_ontology_resolver,
+    get_ontology_resolver_from_env,
+)


 def _create_node_key(node_id: str, category: str) -> str:
@ -83,7 +89,7 @@ def _process_ontology_edges(

 def _create_type_node(
    node_type: str,
-    ontology_resolver: OntologyResolver,
+    ontology_resolver: RDFLibOntologyResolver,
    added_nodes_map: dict,
    added_ontology_nodes_map: dict,
    name_mapping: dict,
@ -141,7 +147,7 @@ def _create_entity_node(
    node_name: str,
    node_description: str,
    type_node: EntityType,
-    ontology_resolver: OntologyResolver,
+    ontology_resolver: RDFLibOntologyResolver,
    added_nodes_map: dict,
    added_ontology_nodes_map: dict,
    name_mapping: dict,
@ -198,7 +204,7 @@ def _create_entity_node(
 def _process_graph_nodes(
    data_chunk: DocumentChunk,
    graph: KnowledgeGraph,
-    ontology_resolver: OntologyResolver,
+    ontology_resolver: RDFLibOntologyResolver,
    added_nodes_map: dict,
    added_ontology_nodes_map: dict,
    name_mapping: dict,
@ -277,7 +283,7 @@ def _process_graph_edges(
 def expand_with_nodes_and_edges(
    data_chunks: list[DocumentChunk],
    chunk_graphs: list[KnowledgeGraph],
-    ontology_resolver: OntologyResolver = None,
+    ontology_resolver: BaseOntologyResolver = None,
    existing_edges_map: Optional[dict[str, bool]] = None,
 ):
    """
@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
        chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
            data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
            from the chunk content.
-        ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
-            types against an ontology. If None, a default OntologyResolver is created.
+        ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
+            types against an ontology. If None, a default RDFLibOntologyResolver is created.
            Defaults to None.
        existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
            duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
        existing_edges_map = {}

    if ontology_resolver is None:
-        ontology_resolver = OntologyResolver()
+        ontology_config = get_ontology_env_config()
+        if (
+            ontology_config.ontology_file_path
+            and ontology_config.ontology_resolver
+            and ontology_config.matching_strategy
+        ):
+            ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
+        else:
+            ontology_resolver = get_default_ontology_resolver()

    added_nodes_map = {}
    added_ontology_nodes_map = {}
--- a/cognee/modules/graph/utils/retrieve_existing_edges.py
+++ b/cognee/modules/graph/utils/retrieve_existing_edges.py
@ -23,8 +23,6 @@ async def retrieve_existing_edges(
        chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
            data chunk. Each graph contains nodes (entities) and edges (relationships) that
            were extracted from the chunk content.
-        graph_engine (GraphDBInterface): Interface to the graph database that will be queried
-            to check for existing edges. Must implement the has_edges() method.

    Returns:
        dict[str, bool]: A mapping of edge keys to boolean values indicating existence.
--- a/cognee/modules/observability/get_observe.py
+++ b/cognee/modules/observability/get_observe.py
@ -9,3 +9,17 @@ def get_observe():
        from langfuse.decorators import observe

        return observe
+    elif monitoring == Observer.NONE:
+        # Return a no-op decorator that handles keyword arguments
+        def no_op_decorator(*args, **kwargs):
+            if len(args) == 1 and callable(args[0]) and not kwargs:
+                # Direct decoration: @observe
+                return args[0]
+            else:
+                # Parameterized decoration: @observe(as_type="generation")
+                def decorator(func):
+                    return func
+
+                return decorator
+
+        return no_op_decorator
--- a/cognee/modules/observability/observers.py
+++ b/cognee/modules/observability/observers.py
@ -4,6 +4,7 @@ from enum import Enum
 class Observer(str, Enum):
    """Monitoring tools"""

+    NONE = "none"
    LANGFUSE = "langfuse"
    LLMLITE = "llmlite"
    LANGSMITH = "langsmith"
--- a/cognee/modules/ontology/base_ontology_resolver.py
+++ b/cognee/modules/ontology/base_ontology_resolver.py
@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+from typing import List, Tuple, Optional
+
+from cognee.modules.ontology.models import AttachedOntologyNode
+from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
+
+
+class BaseOntologyResolver(ABC):
+    """Abstract base class for ontology resolvers."""
+
+    def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
+        """Initialize the ontology resolver with a matching strategy.
+
+        Args:
+            matching_strategy: The strategy to use for entity matching.
+                              Defaults to FuzzyMatchingStrategy if None.
+        """
+        self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
+
+    @abstractmethod
+    def build_lookup(self) -> None:
+        """Build the lookup dictionary for ontology entities."""
+        pass
+
+    @abstractmethod
+    def refresh_lookup(self) -> None:
+        """Refresh the lookup dictionary."""
+        pass
+
+    @abstractmethod
+    def find_closest_match(self, name: str, category: str) -> Optional[str]:
+        """Find the closest match for a given name in the specified category."""
+        pass
+
+    @abstractmethod
+    def get_subgraph(
+        self, node_name: str, node_type: str = "individuals", directed: bool = True
+    ) -> Tuple[
+        List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
+    ]:
+        """Get a subgraph for the given node."""
+        pass
--- a/cognee/modules/ontology/get_default_ontology_resolver.py
+++ b/cognee/modules/ontology/get_default_ontology_resolver.py
@ -0,0 +1,41 @@
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+
+def get_default_ontology_resolver() -> BaseOntologyResolver:
+    return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
+
+
+def get_ontology_resolver_from_env(
+    ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
+) -> BaseOntologyResolver:
+    """
+    Create and return an ontology resolver instance based on environment parameters.
+
+    Currently, this function supports only the RDFLib-based ontology resolver
+    with a fuzzy matching strategy.
+
+    Args:
+        ontology_resolver (str): The ontology resolver type to use.
+            Supported value: "rdflib".
+        matching_strategy (str): The matching strategy to apply.
+            Supported value: "fuzzy".
+        ontology_file_path (str): Path to the ontology file required for the resolver.
+
+    Returns:
+        BaseOntologyResolver: An instance of the requested ontology resolver.
+
+    Raises:
+        EnvironmentError: If the provided resolver or strategy is unsupported,
+            or if required parameters are missing.
+    """
+    if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
+        return RDFLibOntologyResolver(
+            matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
+        )
+    else:
+        raise EnvironmentError(
+            f"Unsupported ontology resolver: {ontology_resolver}. "
+            f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
+        )
--- a/cognee/modules/ontology/matching_strategies.py
+++ b/cognee/modules/ontology/matching_strategies.py
@ -0,0 +1,53 @@
+import difflib
+from abc import ABC, abstractmethod
+from typing import List, Optional
+
+
+class MatchingStrategy(ABC):
+    """Abstract base class for ontology entity matching strategies."""
+
+    @abstractmethod
+    def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
+        """Find the best match for a given name from a list of candidates.
+
+        Args:
+            name: The name to match
+            candidates: List of candidate names to match against
+
+        Returns:
+            The best matching candidate name, or None if no match found
+        """
+        pass
+
+
+class FuzzyMatchingStrategy(MatchingStrategy):
+    """Fuzzy matching strategy using difflib for approximate string matching."""
+
+    def __init__(self, cutoff: float = 0.8):
+        """Initialize fuzzy matching strategy.
+
+        Args:
+            cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
+        """
+        self.cutoff = cutoff
+
+    def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
+        """Find the closest fuzzy match for a given name.
+
+        Args:
+            name: The normalized name to match
+            candidates: List of normalized candidate names
+
+        Returns:
+            The best matching candidate name, or None if no match meets the cutoff
+        """
+        if not candidates:
+            return None
+
+        # Check for exact match first
+        if name in candidates:
+            return name
+
+        # Find fuzzy match
+        best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
+        return best_match[0] if best_match else None
--- a/cognee/modules/ontology/models.py
+++ b/cognee/modules/ontology/models.py
@ -0,0 +1,20 @@
+from typing import Any
+
+
+class AttachedOntologyNode:
+    """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
+
+    def __init__(self, uri: Any, category: str):
+        self.uri = uri
+        self.name = self._extract_name(uri)
+        self.category = category
+
+    @staticmethod
+    def _extract_name(uri: Any) -> str:
+        uri_str = str(uri)
+        if "#" in uri_str:
+            return uri_str.split("#")[-1]
+        return uri_str.rstrip("/").split("/")[-1]
+
+    def __repr__(self):
+        return f"AttachedOntologyNode(name={self.name}, category={self.category})"
--- a/cognee/modules/ontology/ontology_config.py
+++ b/cognee/modules/ontology/ontology_config.py
@ -0,0 +1,24 @@
+from typing import TypedDict, Optional
+
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.matching_strategies import MatchingStrategy
+
+
+class OntologyConfig(TypedDict, total=False):
+    """Configuration containing ontology resolver.
+
+    Attributes:
+        ontology_resolver: The ontology resolver instance to use
+    """
+
+    ontology_resolver: Optional[BaseOntologyResolver]
+
+
+class Config(TypedDict, total=False):
+    """Top-level configuration dictionary.
+
+    Attributes:
+        ontology_config: Configuration containing ontology resolver
+    """
+
+    ontology_config: Optional[OntologyConfig]
--- a/cognee/modules/ontology/ontology_env_config.py
+++ b/cognee/modules/ontology/ontology_env_config.py
@ -0,0 +1,45 @@
+"""This module contains the configuration for ontology handling."""
+
+from functools import lru_cache
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class OntologyEnvConfig(BaseSettings):
+    """
+    Represents the configuration for ontology handling, including parameters for
+    ontology file storage and resolution/matching strategies.
+
+    Public methods:
+    - to_dict
+
+    Instance variables:
+    - ontology_resolver
+    - ontology_matching
+    - ontology_file_path
+    - model_config
+    """
+
+    ontology_resolver: str = "rdflib"
+    matching_strategy: str = "fuzzy"
+    ontology_file_path: str = ""
+
+    model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
+
+    def to_dict(self) -> dict:
+        """
+        Return the configuration as a dictionary.
+        """
+        return {
+            "ontology_resolver": self.ontology_resolver,
+            "matching_strategy": self.matching_strategy,
+            "ontology_file_path": self.ontology_file_path,
+        }
+
+
+@lru_cache
+def get_ontology_env_config():
+    """
+    Retrieve the ontology configuration. This function utilizes caching to return a
+    singleton instance of the OntologyConfig class for efficiency.
+    """
+    return OntologyEnvConfig()
--- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
+++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
    FindClosestMatchError,
    GetSubgraphError,
 )
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.models import AttachedOntologyNode
+from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy

 logger = get_logger("OntologyAdapter")


-class AttachedOntologyNode:
-    """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
+class RDFLibOntologyResolver(BaseOntologyResolver):
+    """RDFLib-based ontology resolver implementation.

-    def __init__(self, uri: URIRef, category: str):
-        self.uri = uri
-        self.name = self._extract_name(uri)
-        self.category = category
+    This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
+    It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
+    """

-    @staticmethod
-    def _extract_name(uri: URIRef) -> str:
-        uri_str = str(uri)
-        if "#" in uri_str:
-            return uri_str.split("#")[-1]
-        return uri_str.rstrip("/").split("/")[-1]
-
-    def __repr__(self):
-        return f"AttachedOntologyNode(name={self.name}, category={self.category})"
-
-
-class OntologyResolver:
-    def __init__(self, ontology_file: Optional[str] = None):
+    def __init__(
+        self,
+        ontology_file: Optional[str] = None,
+        matching_strategy: Optional[MatchingStrategy] = None,
+    ) -> None:
+        super().__init__(matching_strategy)
        self.ontology_file = ontology_file
        try:
            if ontology_file and os.path.exists(ontology_file):
@ -60,7 +55,7 @@ class OntologyResolver:
            name = uri_str.rstrip("/").split("/")[-1]
        return name.lower().replace(" ", "_").strip()

-    def build_lookup(self):
+    def build_lookup(self) -> None:
        try:
            classes: Dict[str, URIRef] = {}
            individuals: Dict[str, URIRef] = {}
@ -97,7 +92,7 @@ class OntologyResolver:
            logger.error("Failed to build lookup dictionary: %s", str(e))
            raise RuntimeError("Lookup build failed") from e

-    def refresh_lookup(self):
+    def refresh_lookup(self) -> None:
        self.build_lookup()
        logger.info("Ontology lookup refreshed.")

@ -105,13 +100,8 @@ class OntologyResolver:
        try:
            normalized_name = name.lower().replace(" ", "_").strip()
            possible_matches = list(self.lookup.get(category, {}).keys())
-            if normalized_name in possible_matches:
-                return normalized_name

-            best_match = difflib.get_close_matches(
-                normalized_name, possible_matches, n=1, cutoff=0.8
-            )
-            return best_match[0] if best_match else None
+            return self.matching_strategy.find_match(normalized_name, possible_matches)
        except Exception as e:
            logger.error("Error in find_closest_match: %s", str(e))
            raise FindClosestMatchError() from e
@ -125,7 +115,9 @@ class OntologyResolver:

    def get_subgraph(
        self, node_name: str, node_type: str = "individuals", directed: bool = True
-    ) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
+    ) -> Tuple[
+        List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
+    ]:
        nodes_set = set()
        edges: List[Tuple[str, str, str]] = []
        visited = set()
--- a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
+++ b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
@ -11,6 +11,19 @@ from cognee.modules.data.methods import (


 async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
+    """
+    Function handles creation and dataset authorization if dataset already exist for Cognee.
+    Verifies that provided user has necessary permission for provided Dataset.
+    If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
+
+    Args:
+        dataset_id: Id of the dataset.
+        dataset_name: Name of the dataset.
+        user: Cognee User request is being processed for, if None default user will be used.
+
+    Returns:
+        Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
+    """
    if not user:
        user = await get_default_user()

--- a/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
+++ b/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
        datasets: Dataset names or Dataset UUID (in case Datasets already exist)

    Returns:
-
+        Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
    """
    # If no user is provided use default user
    if user is None:
--- a/cognee/modules/retrieval/jaccard_retrival.py
+++ b/cognee/modules/retrieval/jaccard_retrival.py
@ -0,0 +1,56 @@
+from cognee.modules.retrieval.lexical_retriever import LexicalRetriever
+import re
+from collections import Counter
+from typing import Optional
+class JaccardChunksRetriever(LexicalRetriever):
+    """
+    Retriever that specializes LexicalRetriever to use Jaccard similarity.
+    """
+
+    def __init__(self, top_k: int = 10, with_scores: bool = False,
+                 stop_words: Optional[list[str]] = None, multiset_jaccard: bool = False):
+        """
+        Parameters
+        ----------
+        top_k : int
+            Number of top results to return.
+        with_scores : bool
+            If True, return (payload, score) pairs. Otherwise, only payloads.
+        stop_words : list[str], optional
+            List of tokens to filter out.
+        multiset_jaccard : bool
+            If True, use multiset Jaccard (frequency aware).
+        """
+        self.stop_words = {t.lower() for t in stop_words} if stop_words else set()
+        self.multiset_jaccard = multiset_jaccard
+
+        super().__init__(
+            tokenizer=self._tokenizer,
+            scorer=self._scorer,
+            top_k=top_k,
+            with_scores=with_scores
+        )
+
+    def _tokenizer(self, text: str) -> list[str]:
+        """
+        Tokenizer: lowercases, splits on word characters (w+), filters stopwords.
+        """
+        tokens = re.findall(r"\w+", text.lower())
+        return [t for t in tokens if t not in self.stop_words]
+
+    def _scorer(self, query_tokens: list[str], chunk_tokens: list[str]) -> float:
+        """
+        Jaccard similarity scorer.
+        - If multiset_jaccard=True, uses frequency-aware Jaccard.
+        - Otherwise, normal set Jaccard.
+        """
+        if self.multiset_jaccard:
+            q_counts, c_counts = Counter(query_tokens), Counter(chunk_tokens)
+            numerator = sum(min(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
+            denominator = sum(max(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
+            return numerator / denominator if denominator else 0.0
+        else:
+            q_set, c_set = set(query_tokens), set(chunk_tokens)
+            if not q_set or not c_set:
+                return 0.0
+            return len(q_set & c_set) / len(q_set | c_set)
--- a/cognee/modules/retrieval/lexical_retriever.py
+++ b/cognee/modules/retrieval/lexical_retriever.py
@ -0,0 +1,117 @@
+import asyncio
+from typing import Any, Callable, Optional
+from heapq import nlargest
+
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.modules.retrieval.base_retriever import BaseRetriever
+from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from cognee.shared.logging_utils import get_logger
+
+
+logger = get_logger("LexicalRetriever")
+
+
+class LexicalRetriever(BaseRetriever):
+
+    def __init__(self, tokenizer: Callable, scorer: Callable, top_k: int = 10, with_scores: bool = False):
+        if not callable(tokenizer) or not callable(scorer):
+            raise TypeError("tokenizer and scorer must be callables")
+        if not isinstance(top_k, int) or top_k <= 0:
+            raise ValueError("top_k must be a positive integer")
+
+        self.tokenizer = tokenizer
+        self.scorer = scorer
+        self.top_k = top_k
+        self.with_scores = bool(with_scores)
+
+        # Cache keyed by dataset context
+        self.chunks: dict[str, Any] = {}   # {chunk_id: tokens}
+        self.payloads: dict[str, Any] = {} # {chunk_id: original_document}
+        self._initialized = False
+        self._init_lock = asyncio.Lock()
+
+    async def initialize(self):
+      """Initialize retriever by reading all DocumentChunks from graph_engine."""
+      async with self._init_lock:
+          if self._initialized:
+              return
+
+          logger.info("Initializing LexicalRetriever by loading DocumentChunks from graph engine")
+
+          try:
+              graph_engine = await get_graph_engine()
+              nodes, _ = await graph_engine.get_filtered_graph_data([{"type": ["DocumentChunk"]}])
+          except Exception as e:
+              logger.error("Graph engine initialization failed")
+              raise NoDataError("Graph engine initialization failed") from e
+
+          chunk_count = 0
+          for node in nodes:
+              try:
+                  chunk_id, document = node
+              except Exception:
+                  logger.warning("Skipping node with unexpected shape: %r", node)
+                  continue
+
+              if document.get("type") == "DocumentChunk" and document.get("text"):
+                  try:
+                      tokens = self.tokenizer(document["text"])
+                      if not tokens:
+                          continue
+                      self.chunks[str(document.get("id",chunk_id))] = tokens
+                      self.payloads[str(document.get("id",chunk_id))] = document
+                      chunk_count += 1
+                  except Exception as e:
+                      logger.error("Tokenizer failed for chunk %s: %s", chunk_id, str(e))
+
+          if chunk_count == 0:
+              logger.error("Initialization completed but no valid chunks were loaded.")
+              raise NoDataError("No valid chunks loaded during initialization.")
+
+          self._initialized = True
+          logger.info("Initialized with %d document chunks", len(self.chunks))
+
+    async def get_context(self, query: str) -> Any:
+        """Retrieves relevant chunks for the given query."""
+        if not self._initialized:
+            await self.initialize()
+
+        if not self.chunks:
+            logger.warning("No chunks available in retriever")
+            return []
+
+        try:
+            query_tokens = self.tokenizer(query)
+        except Exception as e:
+            logger.error("Failed to tokenize query: %s", str(e))
+            return []
+
+        if not query_tokens:
+            logger.warning("Query produced no tokens")
+            return []
+
+        results = []
+        for chunk_id, chunk_tokens in self.chunks.items():
+            try:
+                score = self.scorer(query_tokens, chunk_tokens)
+                if not isinstance(score, (int, float)):
+                    logger.warning("Non-numeric score for chunk %s → treated as 0.0", chunk_id)
+                    score = 0.0
+            except Exception as e:
+                logger.error("Scorer failed for chunk %s: %s", chunk_id, str(e))
+                score = 0.0
+            results.append((chunk_id, score))
+
+        top_results = nlargest(self.top_k, results, key=lambda x: x[1])
+        logger.info("Retrieved %d/%d chunks for query (len=%d)", len(top_results), len(results), len(query_tokens))
+
+        if self.with_scores:
+            return [(self.payloads[chunk_id], score) for chunk_id, score in top_results]
+        else:
+            return [self.payloads[chunk_id] for chunk_id, _ in top_results]
+
+    async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
+        """Returns context for the given query (retrieves if not provided)."""
+        if context is None:
+            context = await self.get_context(query)
+        return context
--- a/cognee/modules/search/methods/get_search_type_tools.py
+++ b/cognee/modules/search/methods/get_search_type_tools.py
@ -15,6 +15,7 @@ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
 from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
 from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
+from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
 from cognee.modules.retrieval.graph_summary_completion_retriever import (
    GraphSummaryCompletionRetriever,
 )
@ -152,6 +153,10 @@ async def get_search_type_tools(
            TemporalRetriever(top_k=top_k).get_completion,
            TemporalRetriever(top_k=top_k).get_context,
        ],
+        SearchType.CHUNKS_LEXICAL: (lambda _r=JaccardChunksRetriever(top_k=top_k): [
+          _r.get_completion,
+          _r.get_context,
+        ])(),
        SearchType.CODING_RULES: [
            CodingRulesRetriever(rules_nodeset_name=node_name).get_existing_rules,
        ],
--- a/cognee/modules/search/types/SearchType.py
+++ b/cognee/modules/search/types/SearchType.py
@ -17,3 +17,4 @@ class SearchType(Enum):
    FEEDBACK = "FEEDBACK"
    TEMPORAL = "TEMPORAL"
    CODING_RULES = "CODING_RULES"
+    CHUNKS_LEXICAL = "CHUNKS_LEXICAL"
--- a/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py
+++ b/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py
@ -9,6 +9,18 @@ from uuid import UUID
 async def authorized_give_permission_on_datasets(
    principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
 ):
+    """
+        Give permission to certain datasets to a user.
+        The request owner must have the necessary permission to share the datasets.
+    Args:
+        principal_id: Id of user to whom datasets are shared
+        dataset_ids: Ids of datasets to share
+        permission_name: Name of permission to give
+        owner_id: Id of the request owner
+
+    Returns:
+        None
+    """
    # If only a single dataset UUID is provided transform it to a list
    if not isinstance(dataset_ids, list):
        dataset_ids = [dataset_ids]
--- a/cognee/modules/users/permissions/methods/check_permission_on_dataset.py
+++ b/cognee/modules/users/permissions/methods/check_permission_on_dataset.py
@ -10,6 +10,17 @@ logger = get_logger()


 async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
+    """
+        Check if a user has a specific permission on a dataset.
+    Args:
+        user: User whose permission is checked
+        permission_type: Type of permission to check
+        dataset_id: Id of the dataset
+
+    Returns:
+        None
+
+    """
    if user is None:
        user = await get_default_user()

--- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
@ -11,6 +11,16 @@ logger = get_logger()


 async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
+    """
+        Return a list of datasets the user has permission for.
+        If the user is part of a tenant, return datasets his roles have permission for.
+    Args:
+        user
+        permission_type
+
+    Returns:
+        list[Dataset]: List of datasets user has permission for
+    """
    datasets = list()
    # Get all datasets User has explicit access to
    datasets.extend(await get_principal_datasets(user, permission_type))
--- a/cognee/modules/users/permissions/methods/get_document_ids_for_user.py
+++ b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py
@ -8,6 +8,16 @@ from ...models import ACL, Permission


 async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]:
+    """
+        Return a list of documents ids for which the user has read permission.
+        If datasets are specified, return only documents from those datasets.
+    Args:
+        user_id: Id of the user
+        datasets: List of datasets
+
+    Returns:
+        list[str]: List of documents for which the user has read permission
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/users/permissions/methods/get_principal.py
+++ b/cognee/modules/users/permissions/methods/get_principal.py
@ -6,6 +6,15 @@ from ...models.Principal import Principal


 async def get_principal(principal_id: UUID):
+    """
+        Return information about a user based on their id
+    Args:
+        principal_id: Id of the user
+
+    Returns:
+        principal: Information about the user (principal)
+
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/users/permissions/methods/get_principal_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_principal_datasets.py
@ -9,6 +9,17 @@ from ...models.ACL import ACL


 async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
+    """
+        Return a list of datasets for which the user (principal) has a certain permission.
+    Args:
+        principal: Information about the user
+        permission_type: Type of permission
+
+    Returns:
+        list[Dataset]: List of datasets for which the user (principal)
+        has the permission (permission_type).
+
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/users/permissions/methods/get_role.py
+++ b/cognee/modules/users/permissions/methods/get_role.py
@ -9,6 +9,16 @@ from ...models.Role import Role


 async def get_role(tenant_id: UUID, role_name: str):
+    """
+        Return the role with the name role_name of the given tenant.
+    Args:
+        tenant_id: Id of the given tenant
+        role_name: Name of the role
+
+    Returns
+        The role for the given tenant.
+
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py
@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets(
        Return a list of datasets user has given permission for. If a list of datasets is provided,
        verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
    Args:
-        user_id:
-        permission_type:
-        dataset_ids:
+        user_id: Id of the user.
+        permission_type: Type of the permission.
+        dataset_ids: Ids of the provided datasets

    Returns:
        list[Dataset]: List of datasets user has permission for
--- a/cognee/modules/users/permissions/methods/get_tenant.py
+++ b/cognee/modules/users/permissions/methods/get_tenant.py
@ -8,6 +8,15 @@ from ...models.Tenant import Tenant


 async def get_tenant(tenant_id: UUID):
+    """
+        Return information about the tenant based on the given id.
+    Args:
+        tenant_id: Id of the given tenant
+
+    Returns
+        Information about the given tenant.
+
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/users/permissions/methods/give_default_permission_to_role.py
+++ b/cognee/modules/users/permissions/methods/give_default_permission_to_role.py
@ -16,6 +16,15 @@ from cognee.modules.users.models import (


 async def give_default_permission_to_role(role_id: UUID, permission_name: str):
+    """
+        Give the permission with given name to the role with the given id as a default permission.
+    Args:
+        role_id: Id of the role
+        permission_name: Name of the permission
+
+    Returns:
+        None
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py
+++ b/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py
@ -16,6 +16,15 @@ from cognee.modules.users.models import (


 async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
+    """
+        Give the permission with given name to the tenant with the given id as a default permission.
+    Args:
+        tenant_id: Id of the tenant
+        permission_name: Name of the permission
+
+    Returns:
+        None
+    """
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        tenant = (
--- a/cognee/modules/users/permissions/methods/give_default_permission_to_user.py
+++ b/cognee/modules/users/permissions/methods/give_default_permission_to_user.py
@ -16,6 +16,15 @@ from cognee.modules.users.models import (


 async def give_default_permission_to_user(user_id: UUID, permission_name: str):
+    """
+        Give the permission with given name to the user with the given id as a default permission.
+    Args:
+        user_id: Id of the tenant
+        permission_name: Name of the permission
+
+    Returns:
+        None
+    """
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
--- a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py
+++ b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py
@ -24,6 +24,16 @@ async def give_permission_on_dataset(
    dataset_id: UUID,
    permission_name: str,
 ):
+    """
+        Give a specific permission on a dataset to a user.
+    Args:
+        principal: User who is being given the permission on the dataset
+        dataset_id: Id of the dataset
+        permission_name: Name of permission to give
+
+    Returns:
+        None
+    """
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
--- a/cognee/modules/users/roles/methods/add_user_to_role.py
+++ b/cognee/modules/users/roles/methods/add_user_to_role.py
@ -21,6 +21,17 @@ from cognee.modules.users.models import (


 async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
+    """
+        Add a user with the given id to the role with the given id.
+    Args:
+        user_id: Id of the user.
+        role_id: Id of the role.
+        owner_id: Id of the request owner.
+
+    Returns:
+        None
+
+    """
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
--- a/cognee/modules/users/roles/methods/create_role.py
+++ b/cognee/modules/users/roles/methods/create_role.py
@ -16,6 +16,16 @@ async def create_role(
    role_name: str,
    owner_id: UUID,
 ):
+    """
+        Create a new role with the given name, if the request owner with the given id
+        has the necessary permission.
+    Args:
+        role_name: Name of the new role.
+        owner_id: Id of the request owner.
+
+    Returns:
+        None
+    """
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        user = await get_user(owner_id)
--- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py
+++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py
@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import (


 async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
+    """
+        Add a user with the given id to the tenant with the given id.
+        This can only be successful if the request owner with the given id is the tenant owner.
+    Args:
+        user_id: Id of the user.
+        tenant_id: Id of the tenant.
+        owner_id: Id of the request owner.
+
+    Returns:
+        None
+
+    """
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        user = await get_user(user_id)
--- a/cognee/modules/users/tenants/methods/create_tenant.py
+++ b/cognee/modules/users/tenants/methods/create_tenant.py
@ -8,6 +8,16 @@ from cognee.modules.users.methods import get_user


 async def create_tenant(tenant_name: str, user_id: UUID):
+    """
+        Create a new tenant with the given name, for the user with the given id.
+        This user is the owner of the tenant.
+    Args:
+        tenant_name: Name of the new tenant.
+        user_id: Id of the user.
+
+    Returns:
+        None
+    """
    db_engine = get_relational_engine()
    async with db_engine.get_async_session() as session:
        try:
--- a/cognee/tasks/graph/extract_graph_from_data.py
+++ b/cognee/tasks/graph/extract_graph_from_data.py
@ -3,8 +3,14 @@ from typing import Type, List, Optional
 from pydantic import BaseModel

 from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
 from cognee.tasks.storage.add_data_points import add_data_points
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.ontology_config import Config
+from cognee.modules.ontology.get_default_ontology_resolver import (
+    get_default_ontology_resolver,
+    get_ontology_resolver_from_env,
+)
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
 from cognee.modules.graph.utils import (
    expand_with_nodes_and_edges,
@ -24,9 +30,28 @@ async def integrate_chunk_graphs(
    data_chunks: list[DocumentChunk],
    chunk_graphs: list,
    graph_model: Type[BaseModel],
-    ontology_adapter: OntologyResolver,
+    ontology_resolver: BaseOntologyResolver,
 ) -> List[DocumentChunk]:
-    """Updates DocumentChunk objects, integrates data points and edges into databases."""
+    """Integrate chunk graphs with ontology validation and store in databases.
+
+    This function processes document chunks and their associated knowledge graphs,
+    validates entities against an ontology resolver, and stores the integrated
+    data points and edges in the configured databases.
+
+    Args:
+        data_chunks: List of document chunks containing source data
+        chunk_graphs: List of knowledge graphs corresponding to each chunk
+        graph_model: Pydantic model class for graph data validation
+        ontology_resolver: Resolver for validating entities against ontology
+
+    Returns:
+        List of updated DocumentChunk objects with integrated data
+
+    Raises:
+        InvalidChunkGraphInputError: If input validation fails
+        InvalidGraphModelError: If graph model validation fails
+        InvalidOntologyAdapterError: If ontology resolver validation fails
+    """

    if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
        raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
@ -36,9 +61,9 @@ async def integrate_chunk_graphs(
        )
    if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
        raise InvalidGraphModelError(graph_model)
-    if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
+    if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
        raise InvalidOntologyAdapterError(
-            type(ontology_adapter).__name__ if ontology_adapter else "None"
+            type(ontology_resolver).__name__ if ontology_resolver else "None"
        )

    graph_engine = await get_graph_engine()
@ -55,7 +80,7 @@ async def integrate_chunk_graphs(
    )

    graph_nodes, graph_edges = expand_with_nodes_and_edges(
-        data_chunks, chunk_graphs, ontology_adapter, existing_edges_map
+        data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
    )

    if len(graph_nodes) > 0:
@ -70,7 +95,7 @@ async def integrate_chunk_graphs(
 async def extract_graph_from_data(
    data_chunks: List[DocumentChunk],
    graph_model: Type[BaseModel],
-    ontology_adapter: OntologyResolver = None,
+    config: Config = None,
    custom_prompt: Optional[str] = None,
 ) -> List[DocumentChunk]:
    """
@ -101,6 +126,24 @@ async def extract_graph_from_data(
                if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
            ]

-    return await integrate_chunk_graphs(
-        data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
-    )
+    # Extract resolver from config if provided, otherwise get default
+    if config is None:
+        ontology_config = get_ontology_env_config()
+        if (
+            ontology_config.ontology_file_path
+            and ontology_config.ontology_resolver
+            and ontology_config.matching_strategy
+        ):
+            config: Config = {
+                "ontology_config": {
+                    "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
+                }
+            }
+        else:
+            config: Config = {
+                "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
+            }
+
+    ontology_resolver = config["ontology_config"]["ontology_resolver"]
+
+    return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)
--- a/cognee/tasks/graph/extract_graph_from_data_v2.py
+++ b/cognee/tasks/graph/extract_graph_from_data_v2.py
@ -3,7 +3,7 @@ from typing import List

 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
 from cognee.shared.data_models import KnowledgeGraph
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
 from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
 from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
    extract_content_nodes_and_relationship_names,
@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
 async def extract_graph_from_data(
    data_chunks: List[DocumentChunk],
    n_rounds: int = 2,
-    ontology_adapter: OntologyResolver = None,
+    ontology_adapter: BaseOntologyResolver = None,
 ) -> List[DocumentChunk]:
-    """Extract and update graph data from document chunks in multiple steps."""
+    """Extract and update graph data from document chunks using cascade extraction.
+
+    This function performs multi-step graph extraction from document chunks,
+    using cascade extraction techniques to build comprehensive knowledge graphs.
+
+    Args:
+        data_chunks: List of document chunks to process
+        n_rounds: Number of extraction rounds to perform (default: 2)
+        ontology_adapter: Resolver for validating entities against ontology
+
+    Returns:
+        List of updated DocumentChunk objects with extracted graph data
+    """
    chunk_nodes = await asyncio.gather(
        *[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks]
    )
@ -44,5 +56,5 @@ async def extract_graph_from_data(
        data_chunks=data_chunks,
        chunk_graphs=chunk_graphs,
        graph_model=KnowledgeGraph,
-        ontology_adapter=ontology_adapter or OntologyResolver(),
+        ontology_adapter=ontology_adapter,
    )
--- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py
+++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py
@ -1,12 +1,14 @@
 import pytest
 from rdflib import Graph, Namespace, RDF, OWL, RDFS
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.models import AttachedOntologyNode
+from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver


 def test_ontology_adapter_initialization_success():
-    """Test successful initialization of OntologyAdapter."""
+    """Test successful initialization of RDFLibOntologyResolver from get_default_ontology_resolver."""

-    adapter = OntologyResolver()
+    adapter = get_default_ontology_resolver()
    adapter.build_lookup()

    assert isinstance(adapter.lookup, dict)
@ -14,7 +16,7 @@ def test_ontology_adapter_initialization_success():

 def test_ontology_adapter_initialization_file_not_found():
    """Test OntologyAdapter initialization with nonexistent file."""
-    adapter = OntologyResolver(ontology_file="nonexistent.owl")
+    adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl")
    assert adapter.graph is None


@ -27,7 +29,7 @@ def test_build_lookup():

    g.add((ns.Audi, RDF.type, ns.Car))

-    resolver = OntologyResolver()
+    resolver = RDFLibOntologyResolver()
    resolver.graph = g
    resolver.build_lookup()

@ -50,7 +52,7 @@ def test_find_closest_match_exact():
    g.add((ns.Car, RDF.type, OWL.Class))
    g.add((ns.Audi, RDF.type, ns.Car))

-    resolver = OntologyResolver()
+    resolver = RDFLibOntologyResolver()
    resolver.graph = g
    resolver.build_lookup()

@ -71,7 +73,7 @@ def test_find_closest_match_fuzzy():
    g.add((ns.Audi, RDF.type, ns.Car))
    g.add((ns.BMW, RDF.type, ns.Car))

-    resolver = OntologyResolver()
+    resolver = RDFLibOntologyResolver()
    resolver.graph = g
    resolver.build_lookup()

@ -92,7 +94,7 @@ def test_find_closest_match_no_match():
    g.add((ns.Audi, RDF.type, ns.Car))
    g.add((ns.BMW, RDF.type, ns.Car))

-    resolver = OntologyResolver()
+    resolver = RDFLibOntologyResolver()
    resolver.graph = g
    resolver.build_lookup()

@ -102,10 +104,10 @@ def test_find_closest_match_no_match():


 def test_get_subgraph_no_match_rdflib():
-    """Test get_subgraph returns empty results for a non-existent node."""
+    """Test get_subgraph returns empty results for a non-existent node using RDFLibOntologyResolver."""
    g = Graph()

-    resolver = OntologyResolver()
+    resolver = get_default_ontology_resolver()
    resolver.graph = g
    resolver.build_lookup()

@ -138,7 +140,7 @@ def test_get_subgraph_success_rdflib():
    g.add((ns.VW, owns, ns.Audi))
    g.add((ns.VW, owns, ns.Porsche))

-    resolver = OntologyResolver()
+    resolver = RDFLibOntologyResolver()
    resolver.graph = g
    resolver.build_lookup()

@ -160,10 +162,10 @@ def test_get_subgraph_success_rdflib():


 def test_refresh_lookup_rdflib():
-    """Test that refresh_lookup rebuilds the lookup dict into a new object."""
+    """Test that refresh_lookup rebuilds the lookup dict into a new object using RDFLibOntologyResolver."""
    g = Graph()

-    resolver = OntologyResolver()
+    resolver = get_default_ontology_resolver()
    resolver.graph = g
    resolver.build_lookup()

@ -172,3 +174,318 @@ def test_refresh_lookup_rdflib():
    resolver.refresh_lookup()

    assert resolver.lookup is not original_lookup
+
+
+def test_fuzzy_matching_strategy_exact_match():
+    """Test FuzzyMatchingStrategy finds exact matches."""
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    strategy = FuzzyMatchingStrategy()
+    candidates = ["audi", "bmw", "mercedes"]
+
+    result = strategy.find_match("audi", candidates)
+    assert result == "audi"
+
+
+def test_fuzzy_matching_strategy_fuzzy_match():
+    """Test FuzzyMatchingStrategy finds fuzzy matches."""
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    strategy = FuzzyMatchingStrategy(cutoff=0.6)
+    candidates = ["audi", "bmw", "mercedes"]
+
+    result = strategy.find_match("audii", candidates)
+    assert result == "audi"
+
+
+def test_fuzzy_matching_strategy_no_match():
+    """Test FuzzyMatchingStrategy returns None when no match meets cutoff."""
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    strategy = FuzzyMatchingStrategy(cutoff=0.9)
+    candidates = ["audi", "bmw", "mercedes"]
+
+    result = strategy.find_match("completely_different", candidates)
+    assert result is None
+
+
+def test_fuzzy_matching_strategy_empty_candidates():
+    """Test FuzzyMatchingStrategy handles empty candidates list."""
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    strategy = FuzzyMatchingStrategy()
+
+    result = strategy.find_match("audi", [])
+    assert result is None
+
+
+def test_base_ontology_resolver_initialization():
+    """Test BaseOntologyResolver initialization with default matching strategy."""
+    from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    class TestOntologyResolver(BaseOntologyResolver):
+        def build_lookup(self):
+            pass
+
+        def refresh_lookup(self):
+            pass
+
+        def find_closest_match(self, name, category):
+            return None
+
+        def get_subgraph(self, node_name, node_type="individuals", directed=True):
+            return [], [], None
+
+    resolver = TestOntologyResolver()
+    assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_base_ontology_resolver_custom_matching_strategy():
+    """Test BaseOntologyResolver initialization with custom matching strategy."""
+    from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+    from cognee.modules.ontology.matching_strategies import MatchingStrategy
+
+    class CustomMatchingStrategy(MatchingStrategy):
+        def find_match(self, name, candidates):
+            return "custom_match"
+
+    class TestOntologyResolver(BaseOntologyResolver):
+        def build_lookup(self):
+            pass
+
+        def refresh_lookup(self):
+            pass
+
+        def find_closest_match(self, name, category):
+            return None
+
+        def get_subgraph(self, node_name, node_type="individuals", directed=True):
+            return [], [], None
+
+    custom_strategy = CustomMatchingStrategy()
+    resolver = TestOntologyResolver(matching_strategy=custom_strategy)
+    assert resolver.matching_strategy == custom_strategy
+
+
+def test_ontology_config_structure():
+    """Test TypedDict structure for ontology configuration."""
+    from cognee.modules.ontology.ontology_config import Config
+    from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    matching_strategy = FuzzyMatchingStrategy()
+    resolver = RDFLibOntologyResolver(matching_strategy=matching_strategy)
+
+    config: Config = {"ontology_config": {"ontology_resolver": resolver}}
+
+    assert config["ontology_config"]["ontology_resolver"] == resolver
+
+
+def test_get_ontology_resolver_default():
+    """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
+    from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    resolver = get_default_ontology_resolver()
+
+    assert isinstance(resolver, RDFLibOntologyResolver)
+    assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_get_default_ontology_resolver():
+    """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
+    from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    resolver = get_default_ontology_resolver()
+
+    assert isinstance(resolver, RDFLibOntologyResolver)
+    assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_rdflib_ontology_resolver_uses_matching_strategy():
+    """Test that RDFLibOntologyResolver uses the provided matching strategy."""
+    from cognee.modules.ontology.matching_strategies import MatchingStrategy
+
+    class TestMatchingStrategy(MatchingStrategy):
+        def find_match(self, name, candidates):
+            return "test_match" if candidates else None
+
+    ns = Namespace("http://example.org/test#")
+    g = Graph()
+    g.add((ns.Car, RDF.type, OWL.Class))
+    g.add((ns.Audi, RDF.type, ns.Car))
+
+    resolver = RDFLibOntologyResolver(matching_strategy=TestMatchingStrategy())
+    resolver.graph = g
+    resolver.build_lookup()
+
+    result = resolver.find_closest_match("Audi", "individuals")
+    assert result == "test_match"
+
+
+def test_rdflib_ontology_resolver_default_matching_strategy():
+    """Test that RDFLibOntologyResolver uses FuzzyMatchingStrategy by default."""
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    resolver = RDFLibOntologyResolver()
+    assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_get_ontology_resolver_from_env_success():
+    """Test get_ontology_resolver_from_env returns correct resolver with valid parameters."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+    from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    resolver = get_ontology_resolver_from_env(
+        ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
+    )
+
+    assert isinstance(resolver, RDFLibOntologyResolver)
+    assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+    assert resolver.ontology_file == "/test/path.owl"
+
+
+def test_get_ontology_resolver_from_env_unsupported_resolver():
+    """Test get_ontology_resolver_from_env raises EnvironmentError for unsupported resolver."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError) as exc_info:
+        get_ontology_resolver_from_env(
+            ontology_resolver="unsupported",
+            matching_strategy="fuzzy",
+            ontology_file_path="/test/path.owl",
+        )
+
+    assert "Unsupported ontology resolver: unsupported" in str(exc_info.value)
+    assert "Supported resolvers are: RdfLib with FuzzyMatchingStrategy" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_unsupported_strategy():
+    """Test get_ontology_resolver_from_env raises EnvironmentError for unsupported strategy."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError) as exc_info:
+        get_ontology_resolver_from_env(
+            ontology_resolver="rdflib",
+            matching_strategy="unsupported",
+            ontology_file_path="/test/path.owl",
+        )
+
+    assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_empty_file_path():
+    """Test get_ontology_resolver_from_env raises EnvironmentError for empty file path."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError) as exc_info:
+        get_ontology_resolver_from_env(
+            ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=""
+        )
+
+    assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_none_file_path():
+    """Test get_ontology_resolver_from_env raises EnvironmentError for None file path."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError) as exc_info:
+        get_ontology_resolver_from_env(
+            ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=None
+        )
+
+    assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_empty_resolver():
+    """Test get_ontology_resolver_from_env raises EnvironmentError for empty resolver."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError) as exc_info:
+        get_ontology_resolver_from_env(
+            ontology_resolver="", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
+        )
+
+    assert "Unsupported ontology resolver:" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_empty_strategy():
+    """Test get_ontology_resolver_from_env raises EnvironmentError for empty strategy."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError) as exc_info:
+        get_ontology_resolver_from_env(
+            ontology_resolver="rdflib", matching_strategy="", ontology_file_path="/test/path.owl"
+        )
+
+    assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_default_parameters():
+    """Test get_ontology_resolver_from_env with default empty parameters raises EnvironmentError."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError) as exc_info:
+        get_ontology_resolver_from_env()
+
+    assert "Unsupported ontology resolver:" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_case_sensitivity():
+    """Test get_ontology_resolver_from_env is case sensitive."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    with pytest.raises(EnvironmentError):
+        get_ontology_resolver_from_env(
+            ontology_resolver="RDFLIB",
+            matching_strategy="fuzzy",
+            ontology_file_path="/test/path.owl",
+        )
+
+    with pytest.raises(EnvironmentError):
+        get_ontology_resolver_from_env(
+            ontology_resolver="RdfLib",
+            matching_strategy="fuzzy",
+            ontology_file_path="/test/path.owl",
+        )
+
+
+def test_get_ontology_resolver_from_env_with_actual_file():
+    """Test get_ontology_resolver_from_env works with actual file path."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+    from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+    from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+    resolver = get_ontology_resolver_from_env(
+        ontology_resolver="rdflib",
+        matching_strategy="fuzzy",
+        ontology_file_path="/path/to/ontology.owl",
+    )
+
+    assert isinstance(resolver, RDFLibOntologyResolver)
+    assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+    assert resolver.ontology_file == "/path/to/ontology.owl"
+
+
+def test_get_ontology_resolver_from_env_resolver_functionality():
+    """Test that resolver created from env function works correctly."""
+    from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+    resolver = get_ontology_resolver_from_env(
+        ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
+    )
+
+    resolver.build_lookup()
+    assert isinstance(resolver.lookup, dict)
+
+    result = resolver.find_closest_match("test", "individuals")
+    assert result is None  # Should return None for non-existent entity
+
+    nodes, relationships, start_node = resolver.get_subgraph("test", "individuals")
+    assert nodes == []
+    assert relationships == []
+    assert start_node is None
--- a/examples/python/ontology_demo_example.py
+++ b/examples/python/ontology_demo_example.py
@ -5,6 +5,8 @@ import cognee
 from cognee.api.v1.search import SearchType
 from cognee.api.v1.visualize.visualize import visualize_graph
 from cognee.shared.logging_utils import setup_logging
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.ontology_config import Config

 text_1 = """
 1. Audi
@ -60,7 +62,14 @@ async def main():
        os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl"
    )

-    await cognee.cognify(ontology_file_path=ontology_path)
+    # Create full config structure manually
+    config: Config = {
+        "ontology_config": {
+            "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
+        }
+    }
+
+    await cognee.cognify(config=config)
    print("Knowledge with ontology created.")

    # Step 4: Query insights
--- a/examples/python/ontology_demo_example_2.py
+++ b/examples/python/ontology_demo_example_2.py
@ -5,6 +5,8 @@ import os
 import textwrap
 from cognee.api.v1.search import SearchType
 from cognee.api.v1.visualize.visualize import visualize_graph
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.ontology_config import Config


 async def run_pipeline(ontology_path=None):
@ -17,7 +19,13 @@ async def run_pipeline(ontology_path=None):

    await cognee.add(scientific_papers_dir)

-    pipeline_run = await cognee.cognify(ontology_file_path=ontology_path)
+    config: Config = {
+        "ontology_config": {
+            "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
+        }
+    }
+
+    pipeline_run = await cognee.cognify(config=config)

    return pipeline_run

--- a/notebooks/ontology_demo.ipynb
+++ b/notebooks/ontology_demo.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "cognee"

-version = "0.3.4.dev4"
+version = "0.3.4"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = [
    { name = "Vasilije Markovic" },
@ -64,14 +64,13 @@ dependencies = [
    "pylance>=0.22.0,<1.0.0",
    "kuzu (==0.11.0)",
    "python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
-]
-
-[project.optional-dependencies]
-api = [
    "uvicorn>=0.34.0,<1.0.0",
    "gunicorn>=20.1.0,<24",
    "websockets>=15.0.1,<16.0.0"
 ]
+
+[project.optional-dependencies]
+api=[]
 distributed = [
    "modal>=1.0.5,<2.0.0",
 ]
--- a/uv.lock
+++ b/uv.lock
@ -852,7 +852,7 @@ wheels = [

 [[package]]
 name = "cognee"
-version = "0.3.4.dev4"
+version = "0.3.4"
 source = { editable = "." }
 dependencies = [
    { name = "aiofiles" },
@ -864,6 +864,7 @@ dependencies = [
    { name = "fastapi" },
    { name = "fastapi-users", extra = ["sqlalchemy"] },
    { name = "filetype" },
+    { name = "gunicorn" },
    { name = "instructor" },
    { name = "jinja2" },
    { name = "kuzu" },
@ -899,17 +900,14 @@ dependencies = [
    { name = "tiktoken" },
    { name = "typing-extensions" },
    { name = "unstructured", extra = ["pdf"] },
+    { name = "uvicorn" },
+    { name = "websockets" },
 ]

 [package.optional-dependencies]
 anthropic = [
    { name = "anthropic" },
 ]
-api = [
-    { name = "gunicorn" },
-    { name = "uvicorn" },
-    { name = "websockets" },
-]
 aws = [
    { name = "s3fs", extra = ["boto3"] },
 ]
@ -1036,7 +1034,7 @@ requires-dist = [
    { name = "google-generativeai", marker = "extra == 'gemini'", specifier = ">=0.8.4,<0.9" },
    { name = "graphiti-core", marker = "extra == 'graphiti'", specifier = ">=0.7.0,<0.8" },
    { name = "groq", marker = "extra == 'groq'", specifier = ">=0.8.0,<1.0.0" },
-    { name = "gunicorn", marker = "extra == 'api'", specifier = ">=20.1.0,<24" },
+    { name = "gunicorn", specifier = ">=20.1.0,<24" },
    { name = "instructor", specifier = ">=1.9.1,<2.0.0" },
    { name = "jinja2", specifier = ">=3.1.3,<4" },
    { name = "kuzu", specifier = "==0.11.0" },
@ -1105,8 +1103,8 @@ requires-dist = [
    { name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" },
    { name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" },
    { name = "unstructured", extras = ["pdf"], specifier = ">=0.18.1,<19" },
-    { name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0,<1.0.0" },
-    { name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1,<16.0.0" },
+    { name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
+    { name = "websockets", specifier = ">=15.0.1,<16.0.0" },
 ]
 provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]

@ -4706,7 +4704,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@ -4717,7 +4715,7 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@ -4744,9 +4742,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@ -4757,7 +4755,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
@ -8204,7 +8202,7 @@ name = "triton"
 version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "setuptools", marker = "sys_platform != 'emscripten'" },
+    { name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },