From 61e47c0b2f1a1b5b0013e17707d6b2220862ff8f Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:39:02 +0100 Subject: [PATCH 1/4] feat: adds multifile ontology handling to OntologyResolver --- .../ontology/get_default_ontology_resolver.py | 10 ++++- .../rdf_xml/RDFLibOntologyResolver.py | 42 +++++++++++++++---- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py index f9aebe59a..6857c339b 100644 --- a/cognee/modules/ontology/get_default_ontology_resolver.py +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -21,7 +21,8 @@ def get_ontology_resolver_from_env( Supported value: "rdflib". matching_strategy (str): The matching strategy to apply. Supported value: "fuzzy". - ontology_file_path (str): Path to the ontology file required for the resolver. + ontology_file_path (str): Path to the ontology file(s) required for the resolver. + Can be a single path or comma-separated paths for multiple files. Returns: BaseOntologyResolver: An instance of the requested ontology resolver. @@ -31,8 +32,13 @@ def get_ontology_resolver_from_env( or if required parameters are missing. """ if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path: + if "," in ontology_file_path: + file_paths = [path.strip() for path in ontology_file_path.split(",")] + else: + file_paths = ontology_file_path + return RDFLibOntologyResolver( - matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path + matching_strategy=FuzzyMatchingStrategy(), ontology_file=file_paths ) else: raise EnvironmentError( diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 2a7a03751..657ebb667 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -2,7 +2,7 @@ import os import difflib from cognee.shared.logging_utils import get_logger from collections import deque -from typing import List, Tuple, Dict, Optional, Any +from typing import List, Tuple, Dict, Optional, Any, Union from rdflib import Graph, URIRef, RDF, RDFS, OWL from cognee.modules.ontology.exceptions import ( @@ -26,22 +26,46 @@ class RDFLibOntologyResolver(BaseOntologyResolver): def __init__( self, - ontology_file: Optional[str] = None, + ontology_file: Optional[Union[str, List[str]]] = None, matching_strategy: Optional[MatchingStrategy] = None, ) -> None: super().__init__(matching_strategy) self.ontology_file = ontology_file try: - if ontology_file and os.path.exists(ontology_file): + # Convert single file to list for uniform processing + files_to_load = [] + if ontology_file is not None: + if isinstance(ontology_file, str): + files_to_load = [ontology_file] + elif isinstance(ontology_file, list): + files_to_load = ontology_file + else: + raise ValueError(f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}") + + # Load ontology files into a single graph + if files_to_load: self.graph = Graph() - self.graph.parse(ontology_file) - logger.info("Ontology loaded successfully from file: %s", ontology_file) + loaded_files = [] + for file_path in files_to_load: + if os.path.exists(file_path): + self.graph.parse(file_path) + loaded_files.append(file_path) + logger.info("Ontology loaded successfully from file: %s", file_path) + else: + logger.warning( + "Ontology file '%s' not found. Skipping this file.", + file_path, + ) + + if not loaded_files: + logger.info("No valid ontology files found. No owl ontology will be attached to the graph.") + self.graph = None + else: + logger.info("Total ontology files loaded: %d", len(loaded_files)) else: - logger.info( - "Ontology file '%s' not found. No owl ontology will be attached to the graph.", - ontology_file, - ) + logger.info("No ontology file provided. No owl ontology will be attached to the graph.") self.graph = None + self.build_lookup() except Exception as e: logger.error("Failed to load ontology", exc_info=e) From 6c576883b910b7f7f08e736a5f406633755a3b1a Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:46:42 +0100 Subject: [PATCH 2/4] Update RDFLibOntologyResolver.py --- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 657ebb667..4f1753432 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -32,7 +32,6 @@ class RDFLibOntologyResolver(BaseOntologyResolver): super().__init__(matching_strategy) self.ontology_file = ontology_file try: - # Convert single file to list for uniform processing files_to_load = [] if ontology_file is not None: if isinstance(ontology_file, str): @@ -42,7 +41,6 @@ class RDFLibOntologyResolver(BaseOntologyResolver): else: raise ValueError(f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}") - # Load ontology files into a single graph if files_to_load: self.graph = Graph() loaded_files = [] From 6d55da00af6577130a217034748942452df537c0 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:47:04 +0100 Subject: [PATCH 3/4] ruff fix --- .../ontology/get_default_ontology_resolver.py | 2 +- .../ontology/rdf_xml/RDFLibOntologyResolver.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py index 6857c339b..7d87c10a6 100644 --- a/cognee/modules/ontology/get_default_ontology_resolver.py +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -36,7 +36,7 @@ def get_ontology_resolver_from_env( file_paths = [path.strip() for path in ontology_file_path.split(",")] else: file_paths = ontology_file_path - + return RDFLibOntologyResolver( matching_strategy=FuzzyMatchingStrategy(), ontology_file=file_paths ) diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 4f1753432..45e32936a 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -39,8 +39,10 @@ class RDFLibOntologyResolver(BaseOntologyResolver): elif isinstance(ontology_file, list): files_to_load = ontology_file else: - raise ValueError(f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}") - + raise ValueError( + f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}" + ) + if files_to_load: self.graph = Graph() loaded_files = [] @@ -54,16 +56,20 @@ class RDFLibOntologyResolver(BaseOntologyResolver): "Ontology file '%s' not found. Skipping this file.", file_path, ) - + if not loaded_files: - logger.info("No valid ontology files found. No owl ontology will be attached to the graph.") + logger.info( + "No valid ontology files found. No owl ontology will be attached to the graph." + ) self.graph = None else: logger.info("Total ontology files loaded: %d", len(loaded_files)) else: - logger.info("No ontology file provided. No owl ontology will be attached to the graph.") + logger.info( + "No ontology file provided. No owl ontology will be attached to the graph." + ) self.graph = None - + self.build_lookup() except Exception as e: logger.error("Failed to load ontology", exc_info=e) From d224864e5727c0946990eefbff4309f38582bf73 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 27 Oct 2025 14:22:34 +0100 Subject: [PATCH 4/4] feat: extends ontology unit tests with multifile support tests + partially or fully missing ontology file list tests --- .../modules/ontology/test_ontology_adapter.py | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index dfab79732..811db7bb1 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -489,3 +489,154 @@ def test_get_ontology_resolver_from_env_resolver_functionality(): assert nodes == [] assert relationships == [] assert start_node is None + + +def test_multifile_ontology_loading_success(): + """Test successful loading of multiple ontology files.""" + ns1 = Namespace("http://example.org/cars#") + ns2 = Namespace("http://example.org/tech#") + + g1 = Graph() + g1.add((ns1.Vehicle, RDF.type, OWL.Class)) + g1.add((ns1.Car, RDF.type, OWL.Class)) + g1.add((ns1.Car, RDFS.subClassOf, ns1.Vehicle)) + g1.add((ns1.Audi, RDF.type, ns1.Car)) + g1.add((ns1.BMW, RDF.type, ns1.Car)) + + g2 = Graph() + g2.add((ns2.Company, RDF.type, OWL.Class)) + g2.add((ns2.TechCompany, RDF.type, OWL.Class)) + g2.add((ns2.TechCompany, RDFS.subClassOf, ns2.Company)) + g2.add((ns2.Apple, RDF.type, ns2.TechCompany)) + g2.add((ns2.Google, RDF.type, ns2.TechCompany)) + + import tempfile + + with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f1: + g1.serialize(f1.name, format="xml") + file1_path = f1.name + + with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f2: + g2.serialize(f2.name, format="xml") + file2_path = f2.name + + try: + resolver = RDFLibOntologyResolver(ontology_file=[file1_path, file2_path]) + + assert resolver.graph is not None + + assert "car" in resolver.lookup["classes"] + assert "vehicle" in resolver.lookup["classes"] + assert "company" in resolver.lookup["classes"] + assert "techcompany" in resolver.lookup["classes"] + + assert "audi" in resolver.lookup["individuals"] + assert "bmw" in resolver.lookup["individuals"] + assert "apple" in resolver.lookup["individuals"] + assert "google" in resolver.lookup["individuals"] + + car_match = resolver.find_closest_match("Audi", "individuals") + assert car_match == "audi" + + tech_match = resolver.find_closest_match("Google", "individuals") + assert tech_match == "google" + + finally: + import os + + os.unlink(file1_path) + os.unlink(file2_path) + + +def test_multifile_ontology_with_missing_files(): + """Test loading multiple ontology files where some don't exist.""" + ns = Namespace("http://example.org/test#") + g = Graph() + g.add((ns.Car, RDF.type, OWL.Class)) + g.add((ns.Audi, RDF.type, ns.Car)) + + import tempfile + + with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f: + g.serialize(f.name, format="xml") + valid_file = f.name + + try: + resolver = RDFLibOntologyResolver( + ontology_file=["nonexistent_file_1.owl", valid_file, "nonexistent_file_2.owl"] + ) + + assert resolver.graph is not None + + assert "car" in resolver.lookup["classes"] + assert "audi" in resolver.lookup["individuals"] + + match = resolver.find_closest_match("Audi", "individuals") + assert match == "audi" + + finally: + import os + + os.unlink(valid_file) + + +def test_multifile_ontology_all_files_missing(): + """Test loading multiple ontology files where all files are missing.""" + resolver = RDFLibOntologyResolver( + ontology_file=["nonexistent_file_1.owl", "nonexistent_file_2.owl", "nonexistent_file_3.owl"] + ) + + assert resolver.graph is None + + assert resolver.lookup["classes"] == {} + assert resolver.lookup["individuals"] == {} + + +def test_multifile_ontology_with_overlapping_entities(): + """Test loading multiple ontology files with overlapping/related entities.""" + ns = Namespace("http://example.org/automotive#") + + g1 = Graph() + g1.add((ns.Vehicle, RDF.type, OWL.Class)) + g1.add((ns.Car, RDF.type, OWL.Class)) + g1.add((ns.Car, RDFS.subClassOf, ns.Vehicle)) + + g2 = Graph() + g2.add((ns.LuxuryCar, RDF.type, OWL.Class)) + g2.add((ns.LuxuryCar, RDFS.subClassOf, ns.Car)) + g2.add((ns.Mercedes, RDF.type, ns.LuxuryCar)) + g2.add((ns.BMW, RDF.type, ns.LuxuryCar)) + + import tempfile + + with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f1: + g1.serialize(f1.name, format="xml") + file1_path = f1.name + + with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f2: + g2.serialize(f2.name, format="xml") + file2_path = f2.name + + try: + resolver = RDFLibOntologyResolver(ontology_file=[file1_path, file2_path]) + + assert "vehicle" in resolver.lookup["classes"] + assert "car" in resolver.lookup["classes"] + assert "luxurycar" in resolver.lookup["classes"] + + assert "mercedes" in resolver.lookup["individuals"] + assert "bmw" in resolver.lookup["individuals"] + + nodes, relationships, start_node = resolver.get_subgraph("Mercedes", "individuals") + + uri_labels = {resolver._uri_to_key(n.uri) for n in nodes} + assert "mercedes" in uri_labels + assert "luxurycar" in uri_labels + assert "car" in uri_labels + assert "vehicle" in uri_labels + + finally: + import os + + os.unlink(file1_path) + os.unlink(file2_path)