feature: adds multifile ontology support (#1674)
<!-- .github/pull_request_template.md --> ## Description Adds multifile ontology support ## Type of Change <!-- Please check the relevant option --> - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) None ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
commit
abb1aba517
3 changed files with 194 additions and 9 deletions
|
|
@ -21,7 +21,8 @@ def get_ontology_resolver_from_env(
|
||||||
Supported value: "rdflib".
|
Supported value: "rdflib".
|
||||||
matching_strategy (str): The matching strategy to apply.
|
matching_strategy (str): The matching strategy to apply.
|
||||||
Supported value: "fuzzy".
|
Supported value: "fuzzy".
|
||||||
ontology_file_path (str): Path to the ontology file required for the resolver.
|
ontology_file_path (str): Path to the ontology file(s) required for the resolver.
|
||||||
|
Can be a single path or comma-separated paths for multiple files.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
BaseOntologyResolver: An instance of the requested ontology resolver.
|
BaseOntologyResolver: An instance of the requested ontology resolver.
|
||||||
|
|
@ -31,8 +32,13 @@ def get_ontology_resolver_from_env(
|
||||||
or if required parameters are missing.
|
or if required parameters are missing.
|
||||||
"""
|
"""
|
||||||
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
|
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
|
||||||
|
if "," in ontology_file_path:
|
||||||
|
file_paths = [path.strip() for path in ontology_file_path.split(",")]
|
||||||
|
else:
|
||||||
|
file_paths = ontology_file_path
|
||||||
|
|
||||||
return RDFLibOntologyResolver(
|
return RDFLibOntologyResolver(
|
||||||
matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
|
matching_strategy=FuzzyMatchingStrategy(), ontology_file=file_paths
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise EnvironmentError(
|
raise EnvironmentError(
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ import os
|
||||||
import difflib
|
import difflib
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from typing import List, Tuple, Dict, Optional, Any
|
from typing import List, Tuple, Dict, Optional, Any, Union
|
||||||
from rdflib import Graph, URIRef, RDF, RDFS, OWL
|
from rdflib import Graph, URIRef, RDF, RDFS, OWL
|
||||||
|
|
||||||
from cognee.modules.ontology.exceptions import (
|
from cognee.modules.ontology.exceptions import (
|
||||||
|
|
@ -26,22 +26,50 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
ontology_file: Optional[str] = None,
|
ontology_file: Optional[Union[str, List[str]]] = None,
|
||||||
matching_strategy: Optional[MatchingStrategy] = None,
|
matching_strategy: Optional[MatchingStrategy] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(matching_strategy)
|
super().__init__(matching_strategy)
|
||||||
self.ontology_file = ontology_file
|
self.ontology_file = ontology_file
|
||||||
try:
|
try:
|
||||||
if ontology_file and os.path.exists(ontology_file):
|
files_to_load = []
|
||||||
|
if ontology_file is not None:
|
||||||
|
if isinstance(ontology_file, str):
|
||||||
|
files_to_load = [ontology_file]
|
||||||
|
elif isinstance(ontology_file, list):
|
||||||
|
files_to_load = ontology_file
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if files_to_load:
|
||||||
self.graph = Graph()
|
self.graph = Graph()
|
||||||
self.graph.parse(ontology_file)
|
loaded_files = []
|
||||||
logger.info("Ontology loaded successfully from file: %s", ontology_file)
|
for file_path in files_to_load:
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
self.graph.parse(file_path)
|
||||||
|
loaded_files.append(file_path)
|
||||||
|
logger.info("Ontology loaded successfully from file: %s", file_path)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Ontology file '%s' not found. Skipping this file.",
|
||||||
|
file_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not loaded_files:
|
||||||
|
logger.info(
|
||||||
|
"No valid ontology files found. No owl ontology will be attached to the graph."
|
||||||
|
)
|
||||||
|
self.graph = None
|
||||||
|
else:
|
||||||
|
logger.info("Total ontology files loaded: %d", len(loaded_files))
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Ontology file '%s' not found. No owl ontology will be attached to the graph.",
|
"No ontology file provided. No owl ontology will be attached to the graph."
|
||||||
ontology_file,
|
|
||||||
)
|
)
|
||||||
self.graph = None
|
self.graph = None
|
||||||
|
|
||||||
self.build_lookup()
|
self.build_lookup()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Failed to load ontology", exc_info=e)
|
logger.error("Failed to load ontology", exc_info=e)
|
||||||
|
|
|
||||||
|
|
@ -489,3 +489,154 @@ def test_get_ontology_resolver_from_env_resolver_functionality():
|
||||||
assert nodes == []
|
assert nodes == []
|
||||||
assert relationships == []
|
assert relationships == []
|
||||||
assert start_node is None
|
assert start_node is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_multifile_ontology_loading_success():
|
||||||
|
"""Test successful loading of multiple ontology files."""
|
||||||
|
ns1 = Namespace("http://example.org/cars#")
|
||||||
|
ns2 = Namespace("http://example.org/tech#")
|
||||||
|
|
||||||
|
g1 = Graph()
|
||||||
|
g1.add((ns1.Vehicle, RDF.type, OWL.Class))
|
||||||
|
g1.add((ns1.Car, RDF.type, OWL.Class))
|
||||||
|
g1.add((ns1.Car, RDFS.subClassOf, ns1.Vehicle))
|
||||||
|
g1.add((ns1.Audi, RDF.type, ns1.Car))
|
||||||
|
g1.add((ns1.BMW, RDF.type, ns1.Car))
|
||||||
|
|
||||||
|
g2 = Graph()
|
||||||
|
g2.add((ns2.Company, RDF.type, OWL.Class))
|
||||||
|
g2.add((ns2.TechCompany, RDF.type, OWL.Class))
|
||||||
|
g2.add((ns2.TechCompany, RDFS.subClassOf, ns2.Company))
|
||||||
|
g2.add((ns2.Apple, RDF.type, ns2.TechCompany))
|
||||||
|
g2.add((ns2.Google, RDF.type, ns2.TechCompany))
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f1:
|
||||||
|
g1.serialize(f1.name, format="xml")
|
||||||
|
file1_path = f1.name
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f2:
|
||||||
|
g2.serialize(f2.name, format="xml")
|
||||||
|
file2_path = f2.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
resolver = RDFLibOntologyResolver(ontology_file=[file1_path, file2_path])
|
||||||
|
|
||||||
|
assert resolver.graph is not None
|
||||||
|
|
||||||
|
assert "car" in resolver.lookup["classes"]
|
||||||
|
assert "vehicle" in resolver.lookup["classes"]
|
||||||
|
assert "company" in resolver.lookup["classes"]
|
||||||
|
assert "techcompany" in resolver.lookup["classes"]
|
||||||
|
|
||||||
|
assert "audi" in resolver.lookup["individuals"]
|
||||||
|
assert "bmw" in resolver.lookup["individuals"]
|
||||||
|
assert "apple" in resolver.lookup["individuals"]
|
||||||
|
assert "google" in resolver.lookup["individuals"]
|
||||||
|
|
||||||
|
car_match = resolver.find_closest_match("Audi", "individuals")
|
||||||
|
assert car_match == "audi"
|
||||||
|
|
||||||
|
tech_match = resolver.find_closest_match("Google", "individuals")
|
||||||
|
assert tech_match == "google"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.unlink(file1_path)
|
||||||
|
os.unlink(file2_path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_multifile_ontology_with_missing_files():
|
||||||
|
"""Test loading multiple ontology files where some don't exist."""
|
||||||
|
ns = Namespace("http://example.org/test#")
|
||||||
|
g = Graph()
|
||||||
|
g.add((ns.Car, RDF.type, OWL.Class))
|
||||||
|
g.add((ns.Audi, RDF.type, ns.Car))
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f:
|
||||||
|
g.serialize(f.name, format="xml")
|
||||||
|
valid_file = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
resolver = RDFLibOntologyResolver(
|
||||||
|
ontology_file=["nonexistent_file_1.owl", valid_file, "nonexistent_file_2.owl"]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert resolver.graph is not None
|
||||||
|
|
||||||
|
assert "car" in resolver.lookup["classes"]
|
||||||
|
assert "audi" in resolver.lookup["individuals"]
|
||||||
|
|
||||||
|
match = resolver.find_closest_match("Audi", "individuals")
|
||||||
|
assert match == "audi"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.unlink(valid_file)
|
||||||
|
|
||||||
|
|
||||||
|
def test_multifile_ontology_all_files_missing():
|
||||||
|
"""Test loading multiple ontology files where all files are missing."""
|
||||||
|
resolver = RDFLibOntologyResolver(
|
||||||
|
ontology_file=["nonexistent_file_1.owl", "nonexistent_file_2.owl", "nonexistent_file_3.owl"]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert resolver.graph is None
|
||||||
|
|
||||||
|
assert resolver.lookup["classes"] == {}
|
||||||
|
assert resolver.lookup["individuals"] == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_multifile_ontology_with_overlapping_entities():
|
||||||
|
"""Test loading multiple ontology files with overlapping/related entities."""
|
||||||
|
ns = Namespace("http://example.org/automotive#")
|
||||||
|
|
||||||
|
g1 = Graph()
|
||||||
|
g1.add((ns.Vehicle, RDF.type, OWL.Class))
|
||||||
|
g1.add((ns.Car, RDF.type, OWL.Class))
|
||||||
|
g1.add((ns.Car, RDFS.subClassOf, ns.Vehicle))
|
||||||
|
|
||||||
|
g2 = Graph()
|
||||||
|
g2.add((ns.LuxuryCar, RDF.type, OWL.Class))
|
||||||
|
g2.add((ns.LuxuryCar, RDFS.subClassOf, ns.Car))
|
||||||
|
g2.add((ns.Mercedes, RDF.type, ns.LuxuryCar))
|
||||||
|
g2.add((ns.BMW, RDF.type, ns.LuxuryCar))
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f1:
|
||||||
|
g1.serialize(f1.name, format="xml")
|
||||||
|
file1_path = f1.name
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f2:
|
||||||
|
g2.serialize(f2.name, format="xml")
|
||||||
|
file2_path = f2.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
resolver = RDFLibOntologyResolver(ontology_file=[file1_path, file2_path])
|
||||||
|
|
||||||
|
assert "vehicle" in resolver.lookup["classes"]
|
||||||
|
assert "car" in resolver.lookup["classes"]
|
||||||
|
assert "luxurycar" in resolver.lookup["classes"]
|
||||||
|
|
||||||
|
assert "mercedes" in resolver.lookup["individuals"]
|
||||||
|
assert "bmw" in resolver.lookup["individuals"]
|
||||||
|
|
||||||
|
nodes, relationships, start_node = resolver.get_subgraph("Mercedes", "individuals")
|
||||||
|
|
||||||
|
uri_labels = {resolver._uri_to_key(n.uri) for n in nodes}
|
||||||
|
assert "mercedes" in uri_labels
|
||||||
|
assert "luxurycar" in uri_labels
|
||||||
|
assert "car" in uri_labels
|
||||||
|
assert "vehicle" in uri_labels
|
||||||
|
|
||||||
|
finally:
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.unlink(file1_path)
|
||||||
|
os.unlink(file2_path)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue