From f651991c86d6fdc9dce6362100c3719dc9c2f5f6 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:02:38 +0200 Subject: [PATCH 01/22] feat: adds base class + renames rdflib implementation --- cognee/api/v1/cognify/cognify.py | 4 +-- .../get_default_tasks_by_indices.py | 4 +-- .../utils/expand_with_nodes_and_edges.py | 16 ++++----- .../ontology/base_ontology_resolver.py | 30 ++++++++++++++++ cognee/modules/ontology/models.py | 20 +++++++++++ .../ontology/rdf_xml/OntologyResolver.py | 35 +++++++------------ cognee/tasks/graph/extract_graph_from_data.py | 6 ++-- .../tasks/graph/extract_graph_from_data_v2.py | 4 +-- .../modules/ontology/test_ontology_adapter.py | 21 +++++------ 9 files changed, 90 insertions(+), 50 deletions(-) create mode 100644 cognee/modules/ontology/base_ontology_resolver.py create mode 100644 cognee/modules/ontology/models.py diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index e4f91b44c..a35658691 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -230,7 +230,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task( extract_graph_from_data, graph_model=graph_model, - ontology_adapter=OntologyResolver(ontology_file=ontology_file_path), + ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path), custom_prompt=custom_prompt, task_config={"batch_size": 10}, ), # Generate knowledge graphs from the document chunks. diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py index be532232f..677090a58 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py @@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver async def get_default_tasks_by_indices( @@ -33,7 +33,7 @@ async def get_no_summary_tasks( # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) - ontology_adapter = OntologyResolver(ontology_file=ontology_file_path) + ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path) graph_task = Task( extract_graph_from_data, diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 125f59e72..3bd62e6e0 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -8,7 +8,7 @@ from cognee.modules.engine.utils import ( generate_node_name, ) from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver def _create_node_key(node_id: str, category: str) -> str: @@ -83,7 +83,7 @@ def _process_ontology_edges( def _create_type_node( node_type: str, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -141,7 +141,7 @@ def _create_entity_node( node_name: str, node_description: str, type_node: EntityType, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -198,7 +198,7 @@ def _create_entity_node( def _process_graph_nodes( data_chunk: DocumentChunk, graph: KnowledgeGraph, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -277,7 +277,7 @@ def _process_graph_edges( def expand_with_nodes_and_edges( data_chunks: list[DocumentChunk], chunk_graphs: list[KnowledgeGraph], - ontology_resolver: OntologyResolver = None, + ontology_resolver: RDFLibOntologyResolver = None, existing_edges_map: Optional[dict[str, bool]] = None, ): """ @@ -296,8 +296,8 @@ def expand_with_nodes_and_edges( chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each data chunk. Each graph contains nodes (entities) and edges (relationships) extracted from the chunk content. - ontology_resolver (OntologyResolver, optional): Resolver for validating entities and - types against an ontology. If None, a default OntologyResolver is created. + ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and + types against an ontology. If None, a default RDFLibOntologyResolver is created. Defaults to None. existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}". @@ -320,7 +320,7 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = OntologyResolver() + ontology_resolver = RDFLibOntologyResolver() added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py new file mode 100644 index 000000000..55826bfb0 --- /dev/null +++ b/cognee/modules/ontology/base_ontology_resolver.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod +from typing import List, Tuple, Optional + +from cognee.modules.ontology.models import AttachedOntologyNode + + +class BaseOntologyResolver(ABC): + """Abstract base class for ontology resolvers.""" + + @abstractmethod + def build_lookup(self) -> None: + """Build the lookup dictionary for ontology entities.""" + pass + + @abstractmethod + def refresh_lookup(self) -> None: + """Refresh the lookup dictionary.""" + pass + + @abstractmethod + def find_closest_match(self, name: str, category: str) -> Optional[str]: + """Find the closest match for a given name in the specified category.""" + pass + + @abstractmethod + def get_subgraph( + self, node_name: str, node_type: str = "individuals", directed: bool = True + ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: + """Get a subgraph for the given node.""" + pass diff --git a/cognee/modules/ontology/models.py b/cognee/modules/ontology/models.py new file mode 100644 index 000000000..eefa9e5dd --- /dev/null +++ b/cognee/modules/ontology/models.py @@ -0,0 +1,20 @@ +from typing import Any + + +class AttachedOntologyNode: + """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface.""" + + def __init__(self, uri: Any, category: str): + self.uri = uri + self.name = self._extract_name(uri) + self.category = category + + @staticmethod + def _extract_name(uri: Any) -> str: + uri_str = str(uri) + if "#" in uri_str: + return uri_str.split("#")[-1] + return uri_str.rstrip("/").split("/")[-1] + + def __repr__(self): + return f"AttachedOntologyNode(name={self.name}, category={self.category})" diff --git a/cognee/modules/ontology/rdf_xml/OntologyResolver.py b/cognee/modules/ontology/rdf_xml/OntologyResolver.py index 7f3fa004d..3c1a55b5a 100644 --- a/cognee/modules/ontology/rdf_xml/OntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/OntologyResolver.py @@ -10,30 +10,19 @@ from cognee.modules.ontology.exceptions import ( FindClosestMatchError, GetSubgraphError, ) +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.models import AttachedOntologyNode logger = get_logger("OntologyAdapter") -class AttachedOntologyNode: - """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface.""" - - def __init__(self, uri: URIRef, category: str): - self.uri = uri - self.name = self._extract_name(uri) - self.category = category - - @staticmethod - def _extract_name(uri: URIRef) -> str: - uri_str = str(uri) - if "#" in uri_str: - return uri_str.split("#")[-1] - return uri_str.rstrip("/").split("/")[-1] - - def __repr__(self): - return f"AttachedOntologyNode(name={self.name}, category={self.category})" - - -class OntologyResolver: +class RDFLibOntologyResolver(BaseOntologyResolver): + """RDFLib-based ontology resolver implementation. + + This implementation uses RDFLib to parse and work with RDF/OWL ontology files. + It provides fuzzy matching and subgraph extraction capabilities for ontology entities. + """ + def __init__(self, ontology_file: Optional[str] = None): self.ontology_file = ontology_file try: @@ -60,7 +49,7 @@ class OntologyResolver: name = uri_str.rstrip("/").split("/")[-1] return name.lower().replace(" ", "_").strip() - def build_lookup(self): + def build_lookup(self) -> None: try: classes: Dict[str, URIRef] = {} individuals: Dict[str, URIRef] = {} @@ -97,7 +86,7 @@ class OntologyResolver: logger.error("Failed to build lookup dictionary: %s", str(e)) raise RuntimeError("Lookup build failed") from e - def refresh_lookup(self): + def refresh_lookup(self) -> None: self.build_lookup() logger.info("Ontology lookup refreshed.") @@ -125,7 +114,7 @@ class OntologyResolver: def get_subgraph( self, node_name: str, node_type: str = "individuals", directed: bool = True - ) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]: + ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: nodes_set = set() edges: List[Tuple[str, str, str]] = [] visited = set() diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index d81516206..2ad32f308 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( expand_with_nodes_and_edges, @@ -24,7 +24,7 @@ async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, graph_model: Type[BaseModel], - ontology_adapter: OntologyResolver, + ontology_adapter: RDFLibOntologyResolver, ) -> List[DocumentChunk]: """Updates DocumentChunk objects, integrates data points and edges into databases.""" @@ -70,7 +70,7 @@ async def integrate_chunk_graphs( async def extract_graph_from_data( data_chunks: List[DocumentChunk], graph_model: Type[BaseModel], - ontology_adapter: OntologyResolver = None, + ontology_adapter: RDFLibOntologyResolver = None, custom_prompt: Optional[str] = None, ) -> List[DocumentChunk]: """ diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index c1f43df5c..ce69f9b0e 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -3,7 +3,7 @@ from typing import List from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( extract_content_nodes_and_relationship_names, @@ -17,7 +17,7 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs async def extract_graph_from_data( data_chunks: List[DocumentChunk], n_rounds: int = 2, - ontology_adapter: OntologyResolver = None, + ontology_adapter: RDFLibOntologyResolver = None, ) -> List[DocumentChunk]: """Extract and update graph data from document chunks in multiple steps.""" chunk_nodes = await asyncio.gather( diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 8b406e53a..e0a6f1402 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -1,12 +1,13 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.models import AttachedOntologyNode def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - adapter = OntologyResolver() + adapter = RDFLibOntologyResolver() adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -14,7 +15,7 @@ def test_ontology_adapter_initialization_success(): def test_ontology_adapter_initialization_file_not_found(): """Test OntologyAdapter initialization with nonexistent file.""" - adapter = OntologyResolver(ontology_file="nonexistent.owl") + adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl") assert adapter.graph is None @@ -27,7 +28,7 @@ def test_build_lookup(): g.add((ns.Audi, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -50,7 +51,7 @@ def test_find_closest_match_exact(): g.add((ns.Car, RDF.type, OWL.Class)) g.add((ns.Audi, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -71,7 +72,7 @@ def test_find_closest_match_fuzzy(): g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -92,7 +93,7 @@ def test_find_closest_match_no_match(): g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -105,7 +106,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -138,7 +139,7 @@ def test_get_subgraph_success_rdflib(): g.add((ns.VW, owns, ns.Audi)) g.add((ns.VW, owns, ns.Porsche)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -163,7 +164,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() From 93a383b56a4e774a863a84847b4eb62ce61789cf Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:23:30 +0200 Subject: [PATCH 02/22] feat: adds matching strategies and moves resolver --- cognee/api/v1/cognify/cognify.py | 2 +- .../get_default_tasks_by_indices.py | 2 +- .../utils/expand_with_nodes_and_edges.py | 2 +- .../ontology/base_ontology_resolver.py | 10 ++++ .../modules/ontology/matching_strategies.py | 55 +++++++++++++++++++ ...yResolver.py => RDFLibOntologyResolver.py} | 13 ++--- cognee/tasks/graph/extract_graph_from_data.py | 2 +- .../tasks/graph/extract_graph_from_data_v2.py | 2 +- .../modules/ontology/test_ontology_adapter.py | 2 +- 9 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 cognee/modules/ontology/matching_strategies.py rename cognee/modules/ontology/rdf_xml/{OntologyResolver.py => RDFLibOntologyResolver.py} (95%) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index a35658691..e933bafd8 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.users.models import User from cognee.tasks.documents import ( diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py index 677090a58..fb10c7eed 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py @@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver async def get_default_tasks_by_indices( diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 3bd62e6e0..bc6205d41 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -8,7 +8,7 @@ from cognee.modules.engine.utils import ( generate_node_name, ) from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver def _create_node_key(node_id: str, category: str) -> str: diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py index 55826bfb0..86f51fcb7 100644 --- a/cognee/modules/ontology/base_ontology_resolver.py +++ b/cognee/modules/ontology/base_ontology_resolver.py @@ -2,10 +2,20 @@ from abc import ABC, abstractmethod from typing import List, Tuple, Optional from cognee.modules.ontology.models import AttachedOntologyNode +from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy class BaseOntologyResolver(ABC): """Abstract base class for ontology resolvers.""" + + def __init__(self, matching_strategy: Optional[MatchingStrategy] = None): + """Initialize the ontology resolver with a matching strategy. + + Args: + matching_strategy: The strategy to use for entity matching. + Defaults to FuzzyMatchingStrategy if None. + """ + self.matching_strategy = matching_strategy or FuzzyMatchingStrategy() @abstractmethod def build_lookup(self) -> None: diff --git a/cognee/modules/ontology/matching_strategies.py b/cognee/modules/ontology/matching_strategies.py new file mode 100644 index 000000000..c576bf6e2 --- /dev/null +++ b/cognee/modules/ontology/matching_strategies.py @@ -0,0 +1,55 @@ +import difflib +from abc import ABC, abstractmethod +from typing import List, Optional + + +class MatchingStrategy(ABC): + """Abstract base class for ontology entity matching strategies.""" + + @abstractmethod + def find_match(self, name: str, candidates: List[str]) -> Optional[str]: + """Find the best match for a given name from a list of candidates. + + Args: + name: The name to match + candidates: List of candidate names to match against + + Returns: + The best matching candidate name, or None if no match found + """ + pass + + +class FuzzyMatchingStrategy(MatchingStrategy): + """Fuzzy matching strategy using difflib for approximate string matching.""" + + def __init__(self, cutoff: float = 0.8): + """Initialize fuzzy matching strategy. + + Args: + cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid + """ + self.cutoff = cutoff + + def find_match(self, name: str, candidates: List[str]) -> Optional[str]: + """Find the closest fuzzy match for a given name. + + Args: + name: The normalized name to match + candidates: List of normalized candidate names + + Returns: + The best matching candidate name, or None if no match meets the cutoff + """ + if not candidates: + return None + + # Check for exact match first + if name in candidates: + return name + + # Find fuzzy match + best_match = difflib.get_close_matches( + name, candidates, n=1, cutoff=self.cutoff + ) + return best_match[0] if best_match else None diff --git a/cognee/modules/ontology/rdf_xml/OntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py similarity index 95% rename from cognee/modules/ontology/rdf_xml/OntologyResolver.py rename to cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 3c1a55b5a..d8de5794a 100644 --- a/cognee/modules/ontology/rdf_xml/OntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -12,6 +12,7 @@ from cognee.modules.ontology.exceptions import ( ) from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode +from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy logger = get_logger("OntologyAdapter") @@ -23,7 +24,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver): It provides fuzzy matching and subgraph extraction capabilities for ontology entities. """ - def __init__(self, ontology_file: Optional[str] = None): + def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None): + super().__init__(matching_strategy) self.ontology_file = ontology_file try: if ontology_file and os.path.exists(ontology_file): @@ -94,13 +96,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver): try: normalized_name = name.lower().replace(" ", "_").strip() possible_matches = list(self.lookup.get(category, {}).keys()) - if normalized_name in possible_matches: - return normalized_name - - best_match = difflib.get_close_matches( - normalized_name, possible_matches, n=1, cutoff=0.8 - ) - return best_match[0] if best_match else None + + return self.matching_strategy.find_match(normalized_name, possible_matches) except Exception as e: logger.error("Error in find_closest_match: %s", str(e)) raise FindClosestMatchError() from e diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 2ad32f308..22cbc70fe 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( expand_with_nodes_and_edges, diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index ce69f9b0e..d2b4924c7 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -3,7 +3,7 @@ from typing import List from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( extract_content_nodes_and_relationship_names, diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index e0a6f1402..051cb3556 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -1,6 +1,6 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode From 00c3ba3a0ccbad28b203938c5d8a47eb7594b492 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:24:39 +0200 Subject: [PATCH 03/22] ruff fix --- .../ontology/base_ontology_resolver.py | 10 ++++---- .../modules/ontology/matching_strategies.py | 24 +++++++++---------- .../rdf_xml/RDFLibOntologyResolver.py | 16 +++++++++---- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py index 86f51fcb7..7005e6981 100644 --- a/cognee/modules/ontology/base_ontology_resolver.py +++ b/cognee/modules/ontology/base_ontology_resolver.py @@ -7,12 +7,12 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyM class BaseOntologyResolver(ABC): """Abstract base class for ontology resolvers.""" - + def __init__(self, matching_strategy: Optional[MatchingStrategy] = None): """Initialize the ontology resolver with a matching strategy. - + Args: - matching_strategy: The strategy to use for entity matching. + matching_strategy: The strategy to use for entity matching. Defaults to FuzzyMatchingStrategy if None. """ self.matching_strategy = matching_strategy or FuzzyMatchingStrategy() @@ -35,6 +35,8 @@ class BaseOntologyResolver(ABC): @abstractmethod def get_subgraph( self, node_name: str, node_type: str = "individuals", directed: bool = True - ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: + ) -> Tuple[ + List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode] + ]: """Get a subgraph for the given node.""" pass diff --git a/cognee/modules/ontology/matching_strategies.py b/cognee/modules/ontology/matching_strategies.py index c576bf6e2..0e8ba7b96 100644 --- a/cognee/modules/ontology/matching_strategies.py +++ b/cognee/modules/ontology/matching_strategies.py @@ -5,15 +5,15 @@ from typing import List, Optional class MatchingStrategy(ABC): """Abstract base class for ontology entity matching strategies.""" - + @abstractmethod def find_match(self, name: str, candidates: List[str]) -> Optional[str]: """Find the best match for a given name from a list of candidates. - + Args: name: The name to match candidates: List of candidate names to match against - + Returns: The best matching candidate name, or None if no match found """ @@ -22,34 +22,32 @@ class MatchingStrategy(ABC): class FuzzyMatchingStrategy(MatchingStrategy): """Fuzzy matching strategy using difflib for approximate string matching.""" - + def __init__(self, cutoff: float = 0.8): """Initialize fuzzy matching strategy. - + Args: cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid """ self.cutoff = cutoff - + def find_match(self, name: str, candidates: List[str]) -> Optional[str]: """Find the closest fuzzy match for a given name. - + Args: name: The normalized name to match candidates: List of normalized candidate names - + Returns: The best matching candidate name, or None if no match meets the cutoff """ if not candidates: return None - + # Check for exact match first if name in candidates: return name - + # Find fuzzy match - best_match = difflib.get_close_matches( - name, candidates, n=1, cutoff=self.cutoff - ) + best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff) return best_match[0] if best_match else None diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index d8de5794a..c6b3e22be 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -19,12 +19,16 @@ logger = get_logger("OntologyAdapter") class RDFLibOntologyResolver(BaseOntologyResolver): """RDFLib-based ontology resolver implementation. - + This implementation uses RDFLib to parse and work with RDF/OWL ontology files. It provides fuzzy matching and subgraph extraction capabilities for ontology entities. """ - - def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None): + + def __init__( + self, + ontology_file: Optional[str] = None, + matching_strategy: Optional[MatchingStrategy] = None, + ): super().__init__(matching_strategy) self.ontology_file = ontology_file try: @@ -96,7 +100,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver): try: normalized_name = name.lower().replace(" ", "_").strip() possible_matches = list(self.lookup.get(category, {}).keys()) - + return self.matching_strategy.find_match(normalized_name, possible_matches) except Exception as e: logger.error("Error in find_closest_match: %s", str(e)) @@ -111,7 +115,9 @@ class RDFLibOntologyResolver(BaseOntologyResolver): def get_subgraph( self, node_name: str, node_type: str = "individuals", directed: bool = True - ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: + ) -> Tuple[ + List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode] + ]: nodes_set = set() edges: List[Tuple[str, str, str]] = [] visited = set() From 631b2f37ce0b8bad90bc5cdb3bcdc7d35d4c9f0e Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:28:44 +0200 Subject: [PATCH 04/22] fix: deletes old ontology resolver instance --- cognee/tasks/graph/extract_graph_from_data.py | 2 +- cognee/tasks/graph/extract_graph_from_data_v2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 22cbc70fe..1ae28ca89 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -102,5 +102,5 @@ async def extract_graph_from_data( ] return await integrate_chunk_graphs( - data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver() + data_chunks, chunk_graphs, graph_model, ontology_adapter ) diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index d2b4924c7..5a4194fb1 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -44,5 +44,5 @@ async def extract_graph_from_data( data_chunks=data_chunks, chunk_graphs=chunk_graphs, graph_model=KnowledgeGraph, - ontology_adapter=ontology_adapter or OntologyResolver(), + ontology_adapter=ontology_adapter, ) From 75bef6e9299677f9e569f4ff2096b032578f7ae8 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:30:19 +0200 Subject: [PATCH 05/22] ruff fix --- cognee/tasks/graph/extract_graph_from_data.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 1ae28ca89..6681dd975 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -101,6 +101,4 @@ async def extract_graph_from_data( if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids ] - return await integrate_chunk_graphs( - data_chunks, chunk_graphs, graph_model, ontology_adapter - ) + return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter) From 6261fca0c4fe57ab9bfe6d66dc7a9c7e2550c608 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:44:04 +0200 Subject: [PATCH 06/22] feat: adds default ontology resolver --- .../graph/utils/expand_with_nodes_and_edges.py | 3 ++- .../ontology/get_default_ontology_resolver.py | 18 ++++++++++++++++++ .../modules/ontology/test_ontology_adapter.py | 7 ++++--- 3 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 cognee/modules/ontology/get_default_ontology_resolver.py diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index bc6205d41..b3e8e8029 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -9,6 +9,7 @@ from cognee.modules.engine.utils import ( ) from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def _create_node_key(node_id: str, category: str) -> str: @@ -320,7 +321,7 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = RDFLibOntologyResolver() + ontology_resolver = get_default_ontology_resolver() added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py new file mode 100644 index 000000000..9dc5c59ba --- /dev/null +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -0,0 +1,18 @@ +from typing import Optional + +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + +def get_default_ontology_resolver(ontology_file: Optional[str] = None) -> BaseOntologyResolver: + """Get the default ontology resolver (RDFLib with fuzzy matching). + + Args: + ontology_file: Optional path to ontology file + + Returns: + Default RDFLib ontology resolver with fuzzy matching strategy + """ + fuzzy_strategy = FuzzyMatchingStrategy() + return RDFLibOntologyResolver(ontology_file=ontology_file, matching_strategy=fuzzy_strategy) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 051cb3556..401c6dc02 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -2,12 +2,13 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - adapter = RDFLibOntologyResolver() + adapter = get_default_ontology_resolver() adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -106,7 +107,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - resolver = RDFLibOntologyResolver() + resolver = get_default_ontology_resolver() resolver.graph = g resolver.build_lookup() @@ -164,7 +165,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - resolver = RDFLibOntologyResolver() + resolver = get_default_ontology_resolver() resolver.graph = g resolver.build_lookup() From 7c046eafab20e8714ee985bb1cf9873c4e9ae3bf Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:28:11 +0200 Subject: [PATCH 07/22] feat: adds get_ontology_resolver + typed dict to hold params --- .../utils/expand_with_nodes_and_edges.py | 5 +-- .../ontology/get_default_ontology_resolver.py | 18 ---------- .../modules/ontology/get_ontology_resolver.py | 36 +++++++++++++++++++ cognee/modules/ontology/ontology_config.py | 16 +++++++++ 4 files changed, 55 insertions(+), 20 deletions(-) delete mode 100644 cognee/modules/ontology/get_default_ontology_resolver.py create mode 100644 cognee/modules/ontology/get_ontology_resolver.py create mode 100644 cognee/modules/ontology/ontology_config.py diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index b3e8e8029..e18860744 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -9,7 +9,7 @@ from cognee.modules.engine.utils import ( ) from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver def _create_node_key(node_id: str, category: str) -> str: @@ -321,7 +321,8 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = get_default_ontology_resolver() + config = get_ontology_resolver() + ontology_resolver = config["resolver"] added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py deleted file mode 100644 index 9dc5c59ba..000000000 --- a/cognee/modules/ontology/get_default_ontology_resolver.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Optional - -from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - -def get_default_ontology_resolver(ontology_file: Optional[str] = None) -> BaseOntologyResolver: - """Get the default ontology resolver (RDFLib with fuzzy matching). - - Args: - ontology_file: Optional path to ontology file - - Returns: - Default RDFLib ontology resolver with fuzzy matching strategy - """ - fuzzy_strategy = FuzzyMatchingStrategy() - return RDFLibOntologyResolver(ontology_file=ontology_file, matching_strategy=fuzzy_strategy) diff --git a/cognee/modules/ontology/get_ontology_resolver.py b/cognee/modules/ontology/get_ontology_resolver.py new file mode 100644 index 000000000..01377c162 --- /dev/null +++ b/cognee/modules/ontology/get_ontology_resolver.py @@ -0,0 +1,36 @@ +from typing import Optional + +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy +from cognee.modules.ontology.ontology_config import OntologyConfig + + +def get_ontology_resolver( + resolver: Optional[BaseOntologyResolver] = None, + matching_strategy: Optional[MatchingStrategy] = None, +) -> OntologyConfig: + """Get ontology resolver configuration with default or custom objects. + + Args: + resolver: Optional pre-configured ontology resolver instance + matching_strategy: Optional matching strategy instance + + Returns: + Ontology configuration with default RDFLib resolver and fuzzy matching strategy, + or custom objects if provided + """ + config: OntologyConfig = {} + + if resolver is not None: + config["resolver"] = resolver + else: + default_strategy = matching_strategy or FuzzyMatchingStrategy() + config["resolver"] = RDFLibOntologyResolver( + ontology_file=None, matching_strategy=default_strategy + ) + + if matching_strategy is not None and resolver is None: + config["matching_strategy"] = matching_strategy + + return config diff --git a/cognee/modules/ontology/ontology_config.py b/cognee/modules/ontology/ontology_config.py new file mode 100644 index 000000000..e28da9f92 --- /dev/null +++ b/cognee/modules/ontology/ontology_config.py @@ -0,0 +1,16 @@ +from typing import TypedDict, Optional + +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.matching_strategies import MatchingStrategy + + +class OntologyConfig(TypedDict, total=False): + """Configuration for ontology resolver. + + Attributes: + resolver: The ontology resolver instance to use + matching_strategy: The matching strategy to use + """ + + resolver: Optional[BaseOntologyResolver] + matching_strategy: Optional[MatchingStrategy] From 142d8068e12fc0380db4b596e3dc8af63dc1e88d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:30:20 +0200 Subject: [PATCH 08/22] chore: updates default empty ontology resolver tests --- .../unit/modules/ontology/test_ontology_adapter.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 401c6dc02..74383d5df 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -2,13 +2,14 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - adapter = get_default_ontology_resolver() + config = get_ontology_resolver() + adapter = config["resolver"] adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -107,7 +108,8 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - resolver = get_default_ontology_resolver() + config = get_ontology_resolver() + resolver = config["resolver"] resolver.graph = g resolver.build_lookup() @@ -165,7 +167,8 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - resolver = get_default_ontology_resolver() + config = get_ontology_resolver() + resolver = config["resolver"] resolver.graph = g resolver.build_lookup() From e815a3fc140d79fb0f6987b7ef730b2a4cd437b6 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:12:47 +0200 Subject: [PATCH 09/22] chore: changes ontology file path parameter to the new config structure --- cognee/api/v1/cognify/cognify.py | 14 +++++++++----- cognee/tasks/graph/extract_graph_from_data.py | 10 +++++++++- examples/python/ontology_demo_example.py | 9 ++++++++- examples/python/ontology_demo_example_2.py | 8 +++++++- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index e933bafd8..f90e487e0 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,7 +10,8 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.ontology_config import OntologyConfig +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -39,7 +40,7 @@ async def cognify( graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_file_path: Optional[str] = None, + ontology_config: OntologyConfig = None, vector_db_config: dict = None, graph_db_config: dict = None, run_in_background: bool = False, @@ -188,11 +189,14 @@ async def cognify( - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ + if ontology_config is None: + ontology_config = get_ontology_resolver() + if temporal_cognify: tasks = await get_temporal_tasks(user, chunker, chunk_size) else: tasks = await get_default_tasks( - user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt + user, graph_model, chunker, chunk_size, ontology_config, custom_prompt ) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for @@ -216,7 +220,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_file_path: Optional[str] = None, + ontology_config: OntologyConfig = get_ontology_resolver(), custom_prompt: Optional[str] = None, ) -> list[Task]: default_tasks = [ @@ -230,7 +234,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task( extract_graph_from_data, graph_model=graph_model, - ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path), + ontology_config=ontology_config, custom_prompt=custom_prompt, task_config={"batch_size": 10}, ), # Generate knowledge graphs from the document chunks. diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 6681dd975..f0ef9c7f9 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,6 +4,8 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points +from cognee.modules.ontology.ontology_config import OntologyConfig +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( @@ -70,7 +72,7 @@ async def integrate_chunk_graphs( async def extract_graph_from_data( data_chunks: List[DocumentChunk], graph_model: Type[BaseModel], - ontology_adapter: RDFLibOntologyResolver = None, + ontology_config: OntologyConfig = None, custom_prompt: Optional[str] = None, ) -> List[DocumentChunk]: """ @@ -101,4 +103,10 @@ async def extract_graph_from_data( if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids ] + # Extract resolver from config if provided, otherwise get default + if ontology_config is None: + ontology_config = get_ontology_resolver() + + ontology_adapter = ontology_config["resolver"] + return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter) diff --git a/examples/python/ontology_demo_example.py b/examples/python/ontology_demo_example.py index 8243faef5..ea1ab8b72 100644 --- a/examples/python/ontology_demo_example.py +++ b/examples/python/ontology_demo_example.py @@ -5,6 +5,8 @@ import cognee from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph from cognee.shared.logging_utils import setup_logging +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver text_1 = """ 1. Audi @@ -60,7 +62,12 @@ async def main(): os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl" ) - await cognee.cognify(ontology_file_path=ontology_path) + # Create ontology config with custom ontology file + ontology_config = get_ontology_resolver( + resolver=RDFLibOntologyResolver(ontology_file=ontology_path) + ) + + await cognee.cognify(ontology_config=ontology_config) print("Knowledge with ontology created.") # Step 4: Query insights diff --git a/examples/python/ontology_demo_example_2.py b/examples/python/ontology_demo_example_2.py index 22fb19862..e897da2e5 100644 --- a/examples/python/ontology_demo_example_2.py +++ b/examples/python/ontology_demo_example_2.py @@ -5,6 +5,8 @@ import os import textwrap from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver async def run_pipeline(ontology_path=None): @@ -17,7 +19,11 @@ async def run_pipeline(ontology_path=None): await cognee.add(scientific_papers_dir) - pipeline_run = await cognee.cognify(ontology_file_path=ontology_path) + ontology_config = get_ontology_resolver( + resolver=RDFLibOntologyResolver(ontology_file=ontology_path) + ) + + pipeline_run = await cognee.cognify(ontology_config=ontology_config) return pipeline_run From d2c7980e8317d7a3af79a2b3bbcd3fb77c786bbf Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:14:39 +0200 Subject: [PATCH 10/22] chore: updates mutable default param --- cognee/api/v1/cognify/cognify.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index f90e487e0..2cb844d12 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -220,9 +220,11 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_config: OntologyConfig = get_ontology_resolver(), + ontology_config: OntologyConfig = None, custom_prompt: Optional[str] = None, ) -> list[Task]: + if ontology_config is None: + ontology_config = get_ontology_resolver() default_tasks = [ Task(classify_documents), Task(check_permissions_on_dataset, user=user, permissions=["write"]), From f4c70cc315dbb73aa8a2463ed8a085119034d535 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:39:43 +0200 Subject: [PATCH 11/22] feat: adds tests for the new logic + fixes the case when only matching is provided --- .../modules/ontology/get_ontology_resolver.py | 5 +- .../modules/ontology/test_ontology_adapter.py | 207 ++++++++++++++++++ 2 files changed, 209 insertions(+), 3 deletions(-) diff --git a/cognee/modules/ontology/get_ontology_resolver.py b/cognee/modules/ontology/get_ontology_resolver.py index 01377c162..d75928af9 100644 --- a/cognee/modules/ontology/get_ontology_resolver.py +++ b/cognee/modules/ontology/get_ontology_resolver.py @@ -24,13 +24,12 @@ def get_ontology_resolver( if resolver is not None: config["resolver"] = resolver + config["matching_strategy"] = matching_strategy or resolver.matching_strategy else: default_strategy = matching_strategy or FuzzyMatchingStrategy() config["resolver"] = RDFLibOntologyResolver( ontology_file=None, matching_strategy=default_strategy ) - - if matching_strategy is not None and resolver is None: - config["matching_strategy"] = matching_strategy + config["matching_strategy"] = default_strategy return config diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 74383d5df..9b7eeeae0 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -177,3 +177,210 @@ def test_refresh_lookup_rdflib(): resolver.refresh_lookup() assert resolver.lookup is not original_lookup + + +def test_fuzzy_matching_strategy_exact_match(): + """Test FuzzyMatchingStrategy finds exact matches.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy() + candidates = ["audi", "bmw", "mercedes"] + + result = strategy.find_match("audi", candidates) + assert result == "audi" + + +def test_fuzzy_matching_strategy_fuzzy_match(): + """Test FuzzyMatchingStrategy finds fuzzy matches.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy(cutoff=0.6) + candidates = ["audi", "bmw", "mercedes"] + + result = strategy.find_match("audii", candidates) + assert result == "audi" + + +def test_fuzzy_matching_strategy_no_match(): + """Test FuzzyMatchingStrategy returns None when no match meets cutoff.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy(cutoff=0.9) + candidates = ["audi", "bmw", "mercedes"] + + result = strategy.find_match("completely_different", candidates) + assert result is None + + +def test_fuzzy_matching_strategy_empty_candidates(): + """Test FuzzyMatchingStrategy handles empty candidates list.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy() + + result = strategy.find_match("audi", []) + assert result is None + + +def test_base_ontology_resolver_initialization(): + """Test BaseOntologyResolver initialization with default matching strategy.""" + from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + class TestOntologyResolver(BaseOntologyResolver): + def build_lookup(self): + pass + + def refresh_lookup(self): + pass + + def find_closest_match(self, name, category): + return None + + def get_subgraph(self, node_name, node_type="individuals", directed=True): + return [], [], None + + resolver = TestOntologyResolver() + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) + + +def test_base_ontology_resolver_custom_matching_strategy(): + """Test BaseOntologyResolver initialization with custom matching strategy.""" + from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver + from cognee.modules.ontology.matching_strategies import MatchingStrategy + + class CustomMatchingStrategy(MatchingStrategy): + def find_match(self, name, candidates): + return "custom_match" + + class TestOntologyResolver(BaseOntologyResolver): + def build_lookup(self): + pass + + def refresh_lookup(self): + pass + + def find_closest_match(self, name, category): + return None + + def get_subgraph(self, node_name, node_type="individuals", directed=True): + return [], [], None + + custom_strategy = CustomMatchingStrategy() + resolver = TestOntologyResolver(matching_strategy=custom_strategy) + assert resolver.matching_strategy == custom_strategy + + +def test_ontology_config_structure(): + """Test OntologyConfig TypedDict structure.""" + from cognee.modules.ontology.ontology_config import OntologyConfig + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + resolver = RDFLibOntologyResolver() + matching_strategy = FuzzyMatchingStrategy() + + config: OntologyConfig = {"resolver": resolver, "matching_strategy": matching_strategy} + + assert config["resolver"] == resolver + assert config["matching_strategy"] == matching_strategy + + +def test_get_ontology_resolver_default(): + """Test get_ontology_resolver returns default configuration.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + config = get_ontology_resolver() + + assert isinstance(config["resolver"], RDFLibOntologyResolver) + assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) + assert config["resolver"].matching_strategy == config["matching_strategy"] + + +def test_get_ontology_resolver_custom_resolver(): + """Test get_ontology_resolver with custom resolver.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") + config = get_ontology_resolver(resolver=custom_resolver) + + assert config["resolver"] == custom_resolver + assert config["matching_strategy"] == custom_resolver.matching_strategy + assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) + + +def test_get_ontology_resolver_custom_matching_strategy(): + """Test get_ontology_resolver with custom matching strategy.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) + config = get_ontology_resolver(matching_strategy=custom_strategy) + + assert isinstance(config["resolver"], RDFLibOntologyResolver) + assert config["matching_strategy"] == custom_strategy + assert config["resolver"].matching_strategy == custom_strategy + + +def test_get_ontology_resolver_both_custom(): + """Test get_ontology_resolver with both custom resolver and matching strategy.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") + custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) + config = get_ontology_resolver(resolver=custom_resolver, matching_strategy=custom_strategy) + + assert config["resolver"] == custom_resolver + assert config["matching_strategy"] == custom_strategy + + +def test_get_ontology_resolver_only_resolver_uses_resolver_strategy(): + """Test that when only resolver is passed, it uses the resolver's matching strategy.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_strategy = FuzzyMatchingStrategy(cutoff=0.8) + custom_resolver = RDFLibOntologyResolver(matching_strategy=custom_strategy) + + config = get_ontology_resolver(resolver=custom_resolver) + + assert config["resolver"] == custom_resolver + assert config["matching_strategy"] == custom_strategy + assert config["matching_strategy"] == custom_resolver.matching_strategy + + +def test_rdflib_ontology_resolver_uses_matching_strategy(): + """Test that RDFLibOntologyResolver uses the provided matching strategy.""" + from cognee.modules.ontology.matching_strategies import MatchingStrategy + + class TestMatchingStrategy(MatchingStrategy): + def find_match(self, name, candidates): + return "test_match" if candidates else None + + ns = Namespace("http://example.org/test#") + g = Graph() + g.add((ns.Car, RDF.type, OWL.Class)) + g.add((ns.Audi, RDF.type, ns.Car)) + + resolver = RDFLibOntologyResolver(matching_strategy=TestMatchingStrategy()) + resolver.graph = g + resolver.build_lookup() + + result = resolver.find_closest_match("Audi", "individuals") + assert result == "test_match" + + +def test_rdflib_ontology_resolver_default_matching_strategy(): + """Test that RDFLibOntologyResolver uses FuzzyMatchingStrategy by default.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + resolver = RDFLibOntologyResolver() + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) From 94373e5a01d948af80756987f3be990ad9652f0e Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:24:23 +0200 Subject: [PATCH 12/22] feat: adds new config structure based on requirements --- cognee/api/v1/cognify/cognify.py | 23 +++-- .../utils/expand_with_nodes_and_edges.py | 8 +- .../ontology/get_default_ontology_resolver.py | 6 ++ .../modules/ontology/get_ontology_resolver.py | 35 -------- cognee/modules/ontology/ontology_config.py | 18 ++-- .../rdf_xml/RDFLibOntologyResolver.py | 2 +- cognee/tasks/graph/extract_graph_from_data.py | 22 ++--- .../modules/ontology/test_ontology_adapter.py | 90 +++++-------------- examples/python/ontology_demo_example.py | 14 +-- examples/python/ontology_demo_example_2.py | 12 +-- 10 files changed, 85 insertions(+), 145 deletions(-) create mode 100644 cognee/modules/ontology/get_default_ontology_resolver.py delete mode 100644 cognee/modules/ontology/get_ontology_resolver.py diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 2cb844d12..f4bd5d1b4 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,8 +10,8 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.ontology_config import OntologyConfig -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.ontology_config import Config +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -40,7 +40,7 @@ async def cognify( graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_config: OntologyConfig = None, + config: Config = None, vector_db_config: dict = None, graph_db_config: dict = None, run_in_background: bool = False, @@ -101,8 +101,6 @@ async def cognify( Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2) Default limits: ~512-8192 tokens depending on models. Smaller chunks = more granular but potentially fragmented knowledge. - ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types. - Useful for specialized fields like medical or legal documents. vector_db_config: Custom vector database configuration for embeddings storage. graph_db_config: Custom graph database configuration for relationship storage. run_in_background: If True, starts processing asynchronously and returns immediately. @@ -189,14 +187,14 @@ async def cognify( - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ - if ontology_config is None: - ontology_config = get_ontology_resolver() + if config is None: + config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} if temporal_cognify: tasks = await get_temporal_tasks(user, chunker, chunk_size) else: tasks = await get_default_tasks( - user, graph_model, chunker, chunk_size, ontology_config, custom_prompt + user, graph_model, chunker, chunk_size, config, custom_prompt ) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for @@ -220,11 +218,12 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_config: OntologyConfig = None, + config: Config = None, custom_prompt: Optional[str] = None, ) -> list[Task]: - if ontology_config is None: - ontology_config = get_ontology_resolver() + if config is None: + config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + default_tasks = [ Task(classify_documents), Task(check_permissions_on_dataset, user=user, permissions=["write"]), @@ -236,7 +235,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task( extract_graph_from_data, graph_model=graph_model, - ontology_config=ontology_config, + config=config, custom_prompt=custom_prompt, task_config={"batch_size": 10}, ), # Generate knowledge graphs from the document chunks. diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index e18860744..ef72cd0e1 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -7,9 +7,10 @@ from cognee.modules.engine.utils import ( generate_node_id, generate_node_name, ) +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def _create_node_key(node_id: str, category: str) -> str: @@ -278,7 +279,7 @@ def _process_graph_edges( def expand_with_nodes_and_edges( data_chunks: list[DocumentChunk], chunk_graphs: list[KnowledgeGraph], - ontology_resolver: RDFLibOntologyResolver = None, + ontology_resolver: BaseOntologyResolver = None, existing_edges_map: Optional[dict[str, bool]] = None, ): """ @@ -321,8 +322,7 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - config = get_ontology_resolver() - ontology_resolver = config["resolver"] + ontology_resolver = get_default_ontology_resolver() added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py new file mode 100644 index 000000000..ae10fbde5 --- /dev/null +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -0,0 +1,6 @@ +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + +def get_default_ontology_resolver() -> RDFLibOntologyResolver: + return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy()) diff --git a/cognee/modules/ontology/get_ontology_resolver.py b/cognee/modules/ontology/get_ontology_resolver.py deleted file mode 100644 index d75928af9..000000000 --- a/cognee/modules/ontology/get_ontology_resolver.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy -from cognee.modules.ontology.ontology_config import OntologyConfig - - -def get_ontology_resolver( - resolver: Optional[BaseOntologyResolver] = None, - matching_strategy: Optional[MatchingStrategy] = None, -) -> OntologyConfig: - """Get ontology resolver configuration with default or custom objects. - - Args: - resolver: Optional pre-configured ontology resolver instance - matching_strategy: Optional matching strategy instance - - Returns: - Ontology configuration with default RDFLib resolver and fuzzy matching strategy, - or custom objects if provided - """ - config: OntologyConfig = {} - - if resolver is not None: - config["resolver"] = resolver - config["matching_strategy"] = matching_strategy or resolver.matching_strategy - else: - default_strategy = matching_strategy or FuzzyMatchingStrategy() - config["resolver"] = RDFLibOntologyResolver( - ontology_file=None, matching_strategy=default_strategy - ) - config["matching_strategy"] = default_strategy - - return config diff --git a/cognee/modules/ontology/ontology_config.py b/cognee/modules/ontology/ontology_config.py index e28da9f92..397411edc 100644 --- a/cognee/modules/ontology/ontology_config.py +++ b/cognee/modules/ontology/ontology_config.py @@ -5,12 +5,20 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy class OntologyConfig(TypedDict, total=False): - """Configuration for ontology resolver. + """Configuration containing ontology resolver. Attributes: - resolver: The ontology resolver instance to use - matching_strategy: The matching strategy to use + ontology_resolver: The ontology resolver instance to use """ - resolver: Optional[BaseOntologyResolver] - matching_strategy: Optional[MatchingStrategy] + ontology_resolver: Optional[BaseOntologyResolver] + + +class Config(TypedDict, total=False): + """Top-level configuration dictionary. + + Attributes: + ontology_config: Configuration containing ontology resolver + """ + + ontology_config: Optional[OntologyConfig] diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index c6b3e22be..2a7a03751 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -28,7 +28,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver): self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None, - ): + ) -> None: super().__init__(matching_strategy) self.ontology_file = ontology_file try: diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index f0ef9c7f9..7c049546c 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,8 +4,8 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points -from cognee.modules.ontology.ontology_config import OntologyConfig -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.ontology_config import Config +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( @@ -26,7 +26,7 @@ async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, graph_model: Type[BaseModel], - ontology_adapter: RDFLibOntologyResolver, + ontology_resolver: RDFLibOntologyResolver, ) -> List[DocumentChunk]: """Updates DocumentChunk objects, integrates data points and edges into databases.""" @@ -38,9 +38,9 @@ async def integrate_chunk_graphs( ) if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel): raise InvalidGraphModelError(graph_model) - if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"): + if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"): raise InvalidOntologyAdapterError( - type(ontology_adapter).__name__ if ontology_adapter else "None" + type(ontology_resolver).__name__ if ontology_resolver else "None" ) graph_engine = await get_graph_engine() @@ -57,7 +57,7 @@ async def integrate_chunk_graphs( ) graph_nodes, graph_edges = expand_with_nodes_and_edges( - data_chunks, chunk_graphs, ontology_adapter, existing_edges_map + data_chunks, chunk_graphs, ontology_resolver, existing_edges_map ) if len(graph_nodes) > 0: @@ -72,7 +72,7 @@ async def integrate_chunk_graphs( async def extract_graph_from_data( data_chunks: List[DocumentChunk], graph_model: Type[BaseModel], - ontology_config: OntologyConfig = None, + config: Config = None, custom_prompt: Optional[str] = None, ) -> List[DocumentChunk]: """ @@ -104,9 +104,9 @@ async def extract_graph_from_data( ] # Extract resolver from config if provided, otherwise get default - if ontology_config is None: - ontology_config = get_ontology_resolver() + if config is None: + config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} - ontology_adapter = ontology_config["resolver"] + ontology_resolver = config["ontology_config"]["ontology_resolver"] - return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter) + return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 9b7eeeae0..88e9b314d 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -2,13 +2,13 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - config = get_ontology_resolver() + config = get_default_ontology_resolver() adapter = config["resolver"] adapter.build_lookup() @@ -108,7 +108,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - config = get_ontology_resolver() + config = get_default_ontology_resolver() resolver = config["resolver"] resolver.graph = g resolver.build_lookup() @@ -167,7 +167,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - config = get_ontology_resolver() + config = get_default_ontology_resolver() resolver = config["resolver"] resolver.graph = g resolver.build_lookup() @@ -272,89 +272,47 @@ def test_base_ontology_resolver_custom_matching_strategy(): def test_ontology_config_structure(): - """Test OntologyConfig TypedDict structure.""" - from cognee.modules.ontology.ontology_config import OntologyConfig + """Test TypedDict structure for ontology configuration.""" + from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy resolver = RDFLibOntologyResolver() matching_strategy = FuzzyMatchingStrategy() - config: OntologyConfig = {"resolver": resolver, "matching_strategy": matching_strategy} + config: Config = {"ontology_config": {"ontology_resolver": resolver}} - assert config["resolver"] == resolver - assert config["matching_strategy"] == matching_strategy + assert config["ontology_config"]["ontology_resolver"] == resolver def test_get_ontology_resolver_default(): """Test get_ontology_resolver returns default configuration.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver + from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - config = get_ontology_resolver() + config: Config = get_default_ontology_resolver() - assert isinstance(config["resolver"], RDFLibOntologyResolver) - assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) - assert config["resolver"].matching_strategy == config["matching_strategy"] + assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver) + assert isinstance( + config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy + ) -def test_get_ontology_resolver_custom_resolver(): - """Test get_ontology_resolver with custom resolver.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +def test_get_default_ontology_resolver(): + """Test get_default_ontology_resolver returns default configuration.""" + from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver + from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") - config = get_ontology_resolver(resolver=custom_resolver) + config: Config = get_default_ontology_resolver() - assert config["resolver"] == custom_resolver - assert config["matching_strategy"] == custom_resolver.matching_strategy - assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) - - -def test_get_ontology_resolver_custom_matching_strategy(): - """Test get_ontology_resolver with custom matching strategy.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver - from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) - config = get_ontology_resolver(matching_strategy=custom_strategy) - - assert isinstance(config["resolver"], RDFLibOntologyResolver) - assert config["matching_strategy"] == custom_strategy - assert config["resolver"].matching_strategy == custom_strategy - - -def test_get_ontology_resolver_both_custom(): - """Test get_ontology_resolver with both custom resolver and matching strategy.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver - from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") - custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) - config = get_ontology_resolver(resolver=custom_resolver, matching_strategy=custom_strategy) - - assert config["resolver"] == custom_resolver - assert config["matching_strategy"] == custom_strategy - - -def test_get_ontology_resolver_only_resolver_uses_resolver_strategy(): - """Test that when only resolver is passed, it uses the resolver's matching strategy.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver - from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - custom_strategy = FuzzyMatchingStrategy(cutoff=0.8) - custom_resolver = RDFLibOntologyResolver(matching_strategy=custom_strategy) - - config = get_ontology_resolver(resolver=custom_resolver) - - assert config["resolver"] == custom_resolver - assert config["matching_strategy"] == custom_strategy - assert config["matching_strategy"] == custom_resolver.matching_strategy + assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver) + assert isinstance( + config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy + ) def test_rdflib_ontology_resolver_uses_matching_strategy(): diff --git a/examples/python/ontology_demo_example.py b/examples/python/ontology_demo_example.py index ea1ab8b72..5b18e6ed4 100644 --- a/examples/python/ontology_demo_example.py +++ b/examples/python/ontology_demo_example.py @@ -5,8 +5,8 @@ import cognee from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph from cognee.shared.logging_utils import setup_logging -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.ontology_config import Config text_1 = """ 1. Audi @@ -62,12 +62,14 @@ async def main(): os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl" ) - # Create ontology config with custom ontology file - ontology_config = get_ontology_resolver( - resolver=RDFLibOntologyResolver(ontology_file=ontology_path) - ) + # Create full config structure manually + config: Config = { + "ontology_config": { + "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path) + } + } - await cognee.cognify(ontology_config=ontology_config) + await cognee.cognify(config=config) print("Knowledge with ontology created.") # Step 4: Query insights diff --git a/examples/python/ontology_demo_example_2.py b/examples/python/ontology_demo_example_2.py index e897da2e5..01bcd9ae4 100644 --- a/examples/python/ontology_demo_example_2.py +++ b/examples/python/ontology_demo_example_2.py @@ -5,8 +5,8 @@ import os import textwrap from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.ontology_config import Config async def run_pipeline(ontology_path=None): @@ -19,11 +19,13 @@ async def run_pipeline(ontology_path=None): await cognee.add(scientific_papers_dir) - ontology_config = get_ontology_resolver( - resolver=RDFLibOntologyResolver(ontology_file=ontology_path) - ) + config: Config = { + "ontology_config": { + "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path) + } + } - pipeline_run = await cognee.cognify(ontology_config=ontology_config) + pipeline_run = await cognee.cognify(config=config) return pipeline_run From 7051367832840bb88155378bbe94cbac612b7ca1 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:02:26 +0200 Subject: [PATCH 13/22] fix: fixes linting --- .../tests/unit/modules/ontology/test_ontology_adapter.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 88e9b314d..a1fc4a4eb 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -8,8 +8,7 @@ from cognee.modules.ontology.get_default_ontology_resolver import get_default_on def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - config = get_default_ontology_resolver() - adapter = config["resolver"] + adapter = get_default_ontology_resolver() adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -108,8 +107,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - config = get_default_ontology_resolver() - resolver = config["resolver"] + resolver = get_default_ontology_resolver() resolver.graph = g resolver.build_lookup() @@ -277,8 +275,8 @@ def test_ontology_config_structure(): from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - resolver = RDFLibOntologyResolver() matching_strategy = FuzzyMatchingStrategy() + resolver = RDFLibOntologyResolver(matching_strategy=matching_strategy) config: Config = {"ontology_config": {"ontology_resolver": resolver}} From 980c3e3677ba7aac22799206beace582c340b62c Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:03:32 +0200 Subject: [PATCH 14/22] fix: fixes unit tests --- .../modules/ontology/test_ontology_adapter.py | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index a1fc4a4eb..d40f1369a 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -165,8 +165,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - config = get_default_ontology_resolver() - resolver = config["resolver"] + resolver = get_default_ontology_resolver() resolver.graph = g resolver.build_lookup() @@ -284,33 +283,25 @@ def test_ontology_config_structure(): def test_get_ontology_resolver_default(): - """Test get_ontology_resolver returns default configuration.""" - from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver - from cognee.modules.ontology.ontology_config import Config + """Test get_default_ontology_resolver returns default resolver.""" from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - config: Config = get_default_ontology_resolver() + resolver = get_default_ontology_resolver() - assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver) - assert isinstance( - config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy - ) + assert isinstance(resolver, RDFLibOntologyResolver) + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) def test_get_default_ontology_resolver(): - """Test get_default_ontology_resolver returns default configuration.""" - from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver - from cognee.modules.ontology.ontology_config import Config + """Test get_default_ontology_resolver returns default resolver.""" from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - config: Config = get_default_ontology_resolver() + resolver = get_default_ontology_resolver() - assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver) - assert isinstance( - config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy - ) + assert isinstance(resolver, RDFLibOntologyResolver) + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) def test_rdflib_ontology_resolver_uses_matching_strategy(): From 7c33418ae973fad68ac6ac3bda4ef88d15372bbb Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:24:15 +0200 Subject: [PATCH 15/22] chore: adds and updates docstrings --- .../utils/expand_with_nodes_and_edges.py | 2 +- cognee/tasks/graph/extract_graph_from_data.py | 25 ++++++++++++++++--- .../tasks/graph/extract_graph_from_data_v2.py | 18 ++++++++++--- .../modules/ontology/test_ontology_adapter.py | 10 ++++---- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index ef72cd0e1..5b603c163 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -298,7 +298,7 @@ def expand_with_nodes_and_edges( chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each data chunk. Each graph contains nodes (entities) and edges (relationships) extracted from the chunk content. - ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and + ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and types against an ontology. If None, a default RDFLibOntologyResolver is created. Defaults to None. existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 7c049546c..5c3b11821 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -6,7 +6,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( expand_with_nodes_and_edges, @@ -26,9 +26,28 @@ async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, graph_model: Type[BaseModel], - ontology_resolver: RDFLibOntologyResolver, + ontology_resolver: BaseOntologyResolver, ) -> List[DocumentChunk]: - """Updates DocumentChunk objects, integrates data points and edges into databases.""" + """Integrate chunk graphs with ontology validation and store in databases. + + This function processes document chunks and their associated knowledge graphs, + validates entities against an ontology resolver, and stores the integrated + data points and edges in the configured databases. + + Args: + data_chunks: List of document chunks containing source data + chunk_graphs: List of knowledge graphs corresponding to each chunk + graph_model: Pydantic model class for graph data validation + ontology_resolver: Resolver for validating entities against ontology + + Returns: + List of updated DocumentChunk objects with integrated data + + Raises: + InvalidChunkGraphInputError: If input validation fails + InvalidGraphModelError: If graph model validation fails + InvalidOntologyAdapterError: If ontology resolver validation fails + """ if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list): raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.") diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index 5a4194fb1..0a8869784 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -3,7 +3,7 @@ from typing import List from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( extract_content_nodes_and_relationship_names, @@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs async def extract_graph_from_data( data_chunks: List[DocumentChunk], n_rounds: int = 2, - ontology_adapter: RDFLibOntologyResolver = None, + ontology_adapter: BaseOntologyResolver = None, ) -> List[DocumentChunk]: - """Extract and update graph data from document chunks in multiple steps.""" + """Extract and update graph data from document chunks using cascade extraction. + + This function performs multi-step graph extraction from document chunks, + using cascade extraction techniques to build comprehensive knowledge graphs. + + Args: + data_chunks: List of document chunks to process + n_rounds: Number of extraction rounds to perform (default: 2) + ontology_adapter: Resolver for validating entities against ontology + + Returns: + List of updated DocumentChunk objects with extracted graph data + """ chunk_nodes = await asyncio.gather( *[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks] ) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index d40f1369a..4757e2595 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -6,7 +6,7 @@ from cognee.modules.ontology.get_default_ontology_resolver import get_default_on def test_ontology_adapter_initialization_success(): - """Test successful initialization of OntologyAdapter.""" + """Test successful initialization of RDFLibOntologyResolver from get_default_ontology_resolver.""" adapter = get_default_ontology_resolver() adapter.build_lookup() @@ -104,7 +104,7 @@ def test_find_closest_match_no_match(): def test_get_subgraph_no_match_rdflib(): - """Test get_subgraph returns empty results for a non-existent node.""" + """Test get_subgraph returns empty results for a non-existent node using RDFLibOntologyResolver.""" g = Graph() resolver = get_default_ontology_resolver() @@ -162,7 +162,7 @@ def test_get_subgraph_success_rdflib(): def test_refresh_lookup_rdflib(): - """Test that refresh_lookup rebuilds the lookup dict into a new object.""" + """Test that refresh_lookup rebuilds the lookup dict into a new object using RDFLibOntologyResolver.""" g = Graph() resolver = get_default_ontology_resolver() @@ -283,7 +283,7 @@ def test_ontology_config_structure(): def test_get_ontology_resolver_default(): - """Test get_default_ontology_resolver returns default resolver.""" + """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy.""" from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy @@ -294,7 +294,7 @@ def test_get_ontology_resolver_default(): def test_get_default_ontology_resolver(): - """Test get_default_ontology_resolver returns default resolver.""" + """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy.""" from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy From 9ee93a4260698a83ebf1fce992894b99fada8351 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:25:00 +0200 Subject: [PATCH 16/22] ruff fix --- cognee/tasks/graph/extract_graph_from_data.py | 8 ++++---- cognee/tasks/graph/extract_graph_from_data_v2.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 5c3b11821..391c6fabe 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -29,20 +29,20 @@ async def integrate_chunk_graphs( ontology_resolver: BaseOntologyResolver, ) -> List[DocumentChunk]: """Integrate chunk graphs with ontology validation and store in databases. - + This function processes document chunks and their associated knowledge graphs, validates entities against an ontology resolver, and stores the integrated data points and edges in the configured databases. - + Args: data_chunks: List of document chunks containing source data chunk_graphs: List of knowledge graphs corresponding to each chunk graph_model: Pydantic model class for graph data validation ontology_resolver: Resolver for validating entities against ontology - + Returns: List of updated DocumentChunk objects with integrated data - + Raises: InvalidChunkGraphInputError: If input validation fails InvalidGraphModelError: If graph model validation fails diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index 0a8869784..8cea6602e 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -20,15 +20,15 @@ async def extract_graph_from_data( ontology_adapter: BaseOntologyResolver = None, ) -> List[DocumentChunk]: """Extract and update graph data from document chunks using cascade extraction. - + This function performs multi-step graph extraction from document chunks, using cascade extraction techniques to build comprehensive knowledge graphs. - + Args: data_chunks: List of document chunks to process n_rounds: Number of extraction rounds to perform (default: 2) ontology_adapter: Resolver for validating entities against ontology - + Returns: List of updated DocumentChunk objects with extracted graph data """ From 2f225c9e036c6444da73d641f89fe63e96c5d438 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:54:33 +0200 Subject: [PATCH 17/22] feat: adds ontology resolver env handling --- cognee/api/v1/cognify/cognify.py | 38 ++++++++++++++-- .../ontology/get_default_ontology_resolver.py | 37 ++++++++++++++- .../modules/ontology/ontology_env_config.py | 45 +++++++++++++++++++ cognee/tasks/graph/extract_graph_from_data.py | 22 ++++++++- 4 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 cognee/modules/ontology/ontology_env_config.py diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index f4bd5d1b4..1292d243a 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -3,6 +3,7 @@ from pydantic import BaseModel from typing import Union, Optional from uuid import UUID +from cognee.modules.ontology.ontology_env_config import get_ontology_env_config from cognee.shared.logging_utils import get_logger from cognee.shared.data_models import KnowledgeGraph from cognee.infrastructure.llm import get_max_chunk_tokens @@ -11,7 +12,10 @@ from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker from cognee.modules.ontology.ontology_config import Config -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import ( + get_default_ontology_resolver, + get_ontology_resolver_from_env, +) from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -188,7 +192,21 @@ async def cognify( - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ if config is None: - config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + ontology_config = get_ontology_env_config() + if ( + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy + ): + config: Config = { + "ontology_config": { + "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict()) + } + } + else: + config: Config = { + "ontology_config": {"ontology_resolver": get_default_ontology_resolver()} + } if temporal_cognify: tasks = await get_temporal_tasks(user, chunker, chunk_size) @@ -222,7 +240,21 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's custom_prompt: Optional[str] = None, ) -> list[Task]: if config is None: - config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + ontology_config = get_ontology_env_config() + if ( + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy + ): + config: Config = { + "ontology_config": { + "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict()) + } + } + else: + config: Config = { + "ontology_config": {"ontology_resolver": get_default_ontology_resolver()} + } default_tasks = [ Task(classify_documents), diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py index ae10fbde5..f9aebe59a 100644 --- a/cognee/modules/ontology/get_default_ontology_resolver.py +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -1,6 +1,41 @@ +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy -def get_default_ontology_resolver() -> RDFLibOntologyResolver: +def get_default_ontology_resolver() -> BaseOntologyResolver: return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy()) + + +def get_ontology_resolver_from_env( + ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = "" +) -> BaseOntologyResolver: + """ + Create and return an ontology resolver instance based on environment parameters. + + Currently, this function supports only the RDFLib-based ontology resolver + with a fuzzy matching strategy. + + Args: + ontology_resolver (str): The ontology resolver type to use. + Supported value: "rdflib". + matching_strategy (str): The matching strategy to apply. + Supported value: "fuzzy". + ontology_file_path (str): Path to the ontology file required for the resolver. + + Returns: + BaseOntologyResolver: An instance of the requested ontology resolver. + + Raises: + EnvironmentError: If the provided resolver or strategy is unsupported, + or if required parameters are missing. + """ + if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path: + return RDFLibOntologyResolver( + matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path + ) + else: + raise EnvironmentError( + f"Unsupported ontology resolver: {ontology_resolver}. " + f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy." + ) diff --git a/cognee/modules/ontology/ontology_env_config.py b/cognee/modules/ontology/ontology_env_config.py new file mode 100644 index 000000000..a351b35e7 --- /dev/null +++ b/cognee/modules/ontology/ontology_env_config.py @@ -0,0 +1,45 @@ +"""This module contains the configuration for ontology handling.""" + +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class OntologyEnvConfig(BaseSettings): + """ + Represents the configuration for ontology handling, including parameters for + ontology file storage and resolution/matching strategies. + + Public methods: + - to_dict + + Instance variables: + - ontology_resolver + - ontology_matching + - ontology_file_path + - model_config + """ + + ontology_resolver: str = "rdflib" + matching_strategy: str = "fuzzy" + ontology_file_path: str = "" + + model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True) + + def to_dict(self) -> dict: + """ + Return the configuration as a dictionary. + """ + return { + "ontology_resolver": self.ontology_resolver, + "matching_strategy": self.matching_strategy, + "ontology_file_path": self.ontology_file_path, + } + + +@lru_cache +def get_ontology_env_config(): + """ + Retrieve the ontology configuration. This function utilizes caching to return a + singleton instance of the OntologyConfig class for efficiency. + """ + return OntologyEnvConfig() diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 391c6fabe..e4dafe4e7 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -3,9 +3,13 @@ from typing import Type, List, Optional from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.modules.ontology.ontology_env_config import get_ontology_env_config from cognee.tasks.storage.add_data_points import add_data_points from cognee.modules.ontology.ontology_config import Config -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import ( + get_default_ontology_resolver, + get_ontology_resolver_from_env, +) from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( @@ -124,7 +128,21 @@ async def extract_graph_from_data( # Extract resolver from config if provided, otherwise get default if config is None: - config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + ontology_config = get_ontology_env_config() + if ( + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy + ): + config: Config = { + "ontology_config": { + "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict()) + } + } + else: + config: Config = { + "ontology_config": {"ontology_resolver": get_default_ontology_resolver()} + } ontology_resolver = config["ontology_config"]["ontology_resolver"] From 57f864a58f2ba68e167db38b99cad58eb4359c16 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:02:25 +0200 Subject: [PATCH 18/22] feat: adds tests for the env settings --- .../modules/ontology/test_ontology_adapter.py | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 4757e2595..efb472c1e 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -331,3 +331,173 @@ def test_rdflib_ontology_resolver_default_matching_strategy(): resolver = RDFLibOntologyResolver() assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) + + +def test_get_ontology_resolver_from_env_success(): + """Test get_ontology_resolver_from_env returns correct resolver with valid parameters.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + resolver = get_ontology_resolver_from_env( + ontology_resolver="rdflib", + matching_strategy="fuzzy", + ontology_file_path="/test/path.owl" + ) + + assert isinstance(resolver, RDFLibOntologyResolver) + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) + assert resolver.ontology_file == "/test/path.owl" + + +def test_get_ontology_resolver_from_env_unsupported_resolver(): + """Test get_ontology_resolver_from_env raises EnvironmentError for unsupported resolver.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError) as exc_info: + get_ontology_resolver_from_env( + ontology_resolver="unsupported", + matching_strategy="fuzzy", + ontology_file_path="/test/path.owl" + ) + + assert "Unsupported ontology resolver: unsupported" in str(exc_info.value) + assert "Supported resolvers are: RdfLib with FuzzyMatchingStrategy" in str(exc_info.value) + + +def test_get_ontology_resolver_from_env_unsupported_strategy(): + """Test get_ontology_resolver_from_env raises EnvironmentError for unsupported strategy.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError) as exc_info: + get_ontology_resolver_from_env( + ontology_resolver="rdflib", + matching_strategy="unsupported", + ontology_file_path="/test/path.owl" + ) + + assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) + + +def test_get_ontology_resolver_from_env_empty_file_path(): + """Test get_ontology_resolver_from_env raises EnvironmentError for empty file path.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError) as exc_info: + get_ontology_resolver_from_env( + ontology_resolver="rdflib", + matching_strategy="fuzzy", + ontology_file_path="" + ) + + assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) + + +def test_get_ontology_resolver_from_env_none_file_path(): + """Test get_ontology_resolver_from_env raises EnvironmentError for None file path.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError) as exc_info: + get_ontology_resolver_from_env( + ontology_resolver="rdflib", + matching_strategy="fuzzy", + ontology_file_path=None + ) + + assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) + + +def test_get_ontology_resolver_from_env_empty_resolver(): + """Test get_ontology_resolver_from_env raises EnvironmentError for empty resolver.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError) as exc_info: + get_ontology_resolver_from_env( + ontology_resolver="", + matching_strategy="fuzzy", + ontology_file_path="/test/path.owl" + ) + + assert "Unsupported ontology resolver:" in str(exc_info.value) + + +def test_get_ontology_resolver_from_env_empty_strategy(): + """Test get_ontology_resolver_from_env raises EnvironmentError for empty strategy.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError) as exc_info: + get_ontology_resolver_from_env( + ontology_resolver="rdflib", + matching_strategy="", + ontology_file_path="/test/path.owl" + ) + + assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) + + +def test_get_ontology_resolver_from_env_default_parameters(): + """Test get_ontology_resolver_from_env with default empty parameters raises EnvironmentError.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError) as exc_info: + get_ontology_resolver_from_env() + + assert "Unsupported ontology resolver:" in str(exc_info.value) + + +def test_get_ontology_resolver_from_env_case_sensitivity(): + """Test get_ontology_resolver_from_env is case sensitive.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + with pytest.raises(EnvironmentError): + get_ontology_resolver_from_env( + ontology_resolver="RDFLIB", + matching_strategy="fuzzy", + ontology_file_path="/test/path.owl" + ) + + with pytest.raises(EnvironmentError): + get_ontology_resolver_from_env( + ontology_resolver="RdfLib", + matching_strategy="fuzzy", + ontology_file_path="/test/path.owl" + ) + + +def test_get_ontology_resolver_from_env_with_actual_file(): + """Test get_ontology_resolver_from_env works with actual file path.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + resolver = get_ontology_resolver_from_env( + ontology_resolver="rdflib", + matching_strategy="fuzzy", + ontology_file_path="/path/to/ontology.owl" + ) + + assert isinstance(resolver, RDFLibOntologyResolver) + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) + assert resolver.ontology_file == "/path/to/ontology.owl" + + +def test_get_ontology_resolver_from_env_resolver_functionality(): + """Test that resolver created from env function works correctly.""" + from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env + + resolver = get_ontology_resolver_from_env( + ontology_resolver="rdflib", + matching_strategy="fuzzy", + ontology_file_path="/test/path.owl" + ) + + resolver.build_lookup() + assert isinstance(resolver.lookup, dict) + + result = resolver.find_closest_match("test", "individuals") + assert result is None # Should return None for non-existent entity + + nodes, relationships, start_node = resolver.get_subgraph("test", "individuals") + assert nodes == [] + assert relationships == [] + assert start_node is None From 765834c0a966688a55f6d416af361fcb5a17a19a Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:39:08 +0200 Subject: [PATCH 19/22] ruff formatting --- .../modules/ontology/test_ontology_adapter.py | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index efb472c1e..dfab79732 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -340,9 +340,7 @@ def test_get_ontology_resolver_from_env_success(): from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy resolver = get_ontology_resolver_from_env( - ontology_resolver="rdflib", - matching_strategy="fuzzy", - ontology_file_path="/test/path.owl" + ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl" ) assert isinstance(resolver, RDFLibOntologyResolver) @@ -358,7 +356,7 @@ def test_get_ontology_resolver_from_env_unsupported_resolver(): get_ontology_resolver_from_env( ontology_resolver="unsupported", matching_strategy="fuzzy", - ontology_file_path="/test/path.owl" + ontology_file_path="/test/path.owl", ) assert "Unsupported ontology resolver: unsupported" in str(exc_info.value) @@ -373,7 +371,7 @@ def test_get_ontology_resolver_from_env_unsupported_strategy(): get_ontology_resolver_from_env( ontology_resolver="rdflib", matching_strategy="unsupported", - ontology_file_path="/test/path.owl" + ontology_file_path="/test/path.owl", ) assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) @@ -385,9 +383,7 @@ def test_get_ontology_resolver_from_env_empty_file_path(): with pytest.raises(EnvironmentError) as exc_info: get_ontology_resolver_from_env( - ontology_resolver="rdflib", - matching_strategy="fuzzy", - ontology_file_path="" + ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="" ) assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) @@ -399,9 +395,7 @@ def test_get_ontology_resolver_from_env_none_file_path(): with pytest.raises(EnvironmentError) as exc_info: get_ontology_resolver_from_env( - ontology_resolver="rdflib", - matching_strategy="fuzzy", - ontology_file_path=None + ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=None ) assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) @@ -413,9 +407,7 @@ def test_get_ontology_resolver_from_env_empty_resolver(): with pytest.raises(EnvironmentError) as exc_info: get_ontology_resolver_from_env( - ontology_resolver="", - matching_strategy="fuzzy", - ontology_file_path="/test/path.owl" + ontology_resolver="", matching_strategy="fuzzy", ontology_file_path="/test/path.owl" ) assert "Unsupported ontology resolver:" in str(exc_info.value) @@ -427,9 +419,7 @@ def test_get_ontology_resolver_from_env_empty_strategy(): with pytest.raises(EnvironmentError) as exc_info: get_ontology_resolver_from_env( - ontology_resolver="rdflib", - matching_strategy="", - ontology_file_path="/test/path.owl" + ontology_resolver="rdflib", matching_strategy="", ontology_file_path="/test/path.owl" ) assert "Unsupported ontology resolver: rdflib" in str(exc_info.value) @@ -453,14 +443,14 @@ def test_get_ontology_resolver_from_env_case_sensitivity(): get_ontology_resolver_from_env( ontology_resolver="RDFLIB", matching_strategy="fuzzy", - ontology_file_path="/test/path.owl" + ontology_file_path="/test/path.owl", ) with pytest.raises(EnvironmentError): get_ontology_resolver_from_env( ontology_resolver="RdfLib", matching_strategy="fuzzy", - ontology_file_path="/test/path.owl" + ontology_file_path="/test/path.owl", ) @@ -473,7 +463,7 @@ def test_get_ontology_resolver_from_env_with_actual_file(): resolver = get_ontology_resolver_from_env( ontology_resolver="rdflib", matching_strategy="fuzzy", - ontology_file_path="/path/to/ontology.owl" + ontology_file_path="/path/to/ontology.owl", ) assert isinstance(resolver, RDFLibOntologyResolver) @@ -486,9 +476,7 @@ def test_get_ontology_resolver_from_env_resolver_functionality(): from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env resolver = get_ontology_resolver_from_env( - ontology_resolver="rdflib", - matching_strategy="fuzzy", - ontology_file_path="/test/path.owl" + ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl" ) resolver.build_lookup() From 54cf0967cc2d7d360ffb8b70046e3f72fec6496f Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:46:01 +0200 Subject: [PATCH 20/22] chore: updates env template --- .env.template | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.env.template b/.env.template index 781e82428..ddcd41a6c 100644 --- a/.env.template +++ b/.env.template @@ -116,7 +116,15 @@ VECTOR_DB_PROVIDER="lancedb" VECTOR_DB_URL= VECTOR_DB_KEY= +################################################################################ +# 🧩 Ontology resolver settings +################################################################################ +# -- Ontology resolver params -------------------------------------- +# ONTOLOGY_RESOLVER=rdflib # Default: uses rdflib and owl file to read ontology structures +# MATCHING_STRATEGY=fuzzy # Default: uses fuzzy matching with 80% similarity threshold +# ONTOLOGY_FILE_PATH=YOUR_FULL_FULE_PATH # Default: empty +# To add ontology resolvers, either set them as it is set in ontology_example or add full_path and settings as envs. ################################################################################ # 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS From ea487d2ca3715c4233674b62e66ab391c8d1c19d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:56:11 +0200 Subject: [PATCH 21/22] adds: ads default handling in expand_with_nodes_and_edges --- .../graph/utils/expand_with_nodes_and_edges.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 5b603c163..39c1d4bd1 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -8,9 +8,11 @@ from cognee.modules.engine.utils import ( generate_node_name, ) from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.ontology_env_config import get_ontology_env_config from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver, \ + get_ontology_resolver_from_env def _create_node_key(node_id: str, category: str) -> str: @@ -322,7 +324,15 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = get_default_ontology_resolver() + ontology_config = get_ontology_env_config() + if ( + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy + ): + ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict()) + else: + ontology_resolver = get_default_ontology_resolver() added_nodes_map = {} added_ontology_nodes_map = {} From 28ed9c4c736fbe2e07f11e3f36f5adc6f9fcbc4f Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:58:18 +0200 Subject: [PATCH 22/22] ruff formatting --- .../graph/utils/expand_with_nodes_and_edges.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 39c1d4bd1..3b01f5af4 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -11,8 +11,10 @@ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.ontology.ontology_env_config import get_ontology_env_config from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver, \ - get_ontology_resolver_from_env +from cognee.modules.ontology.get_default_ontology_resolver import ( + get_default_ontology_resolver, + get_ontology_resolver_from_env, +) def _create_node_key(node_id: str, category: str) -> str: @@ -326,9 +328,9 @@ def expand_with_nodes_and_edges( if ontology_resolver is None: ontology_config = get_ontology_env_config() if ( - ontology_config.ontology_file_path - and ontology_config.ontology_resolver - and ontology_config.matching_strategy + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy ): ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict()) else: