feat: adds multifile ontology handling to OntologyResolver

This commit is contained in:
hajdul88 2025-10-27 13:39:02 +01:00
parent 74cf8bd7c7
commit 61e47c0b2f
2 changed files with 41 additions and 11 deletions

View file

@ -21,7 +21,8 @@ def get_ontology_resolver_from_env(
Supported value: "rdflib".
matching_strategy (str): The matching strategy to apply.
Supported value: "fuzzy".
ontology_file_path (str): Path to the ontology file required for the resolver.
ontology_file_path (str): Path to the ontology file(s) required for the resolver.
Can be a single path or comma-separated paths for multiple files.
Returns:
BaseOntologyResolver: An instance of the requested ontology resolver.
@ -31,8 +32,13 @@ def get_ontology_resolver_from_env(
or if required parameters are missing.
"""
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
if "," in ontology_file_path:
file_paths = [path.strip() for path in ontology_file_path.split(",")]
else:
file_paths = ontology_file_path
return RDFLibOntologyResolver(
matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
matching_strategy=FuzzyMatchingStrategy(), ontology_file=file_paths
)
else:
raise EnvironmentError(

View file

@ -2,7 +2,7 @@ import os
import difflib
from cognee.shared.logging_utils import get_logger
from collections import deque
from typing import List, Tuple, Dict, Optional, Any
from typing import List, Tuple, Dict, Optional, Any, Union
from rdflib import Graph, URIRef, RDF, RDFS, OWL
from cognee.modules.ontology.exceptions import (
@ -26,22 +26,46 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
def __init__(
self,
ontology_file: Optional[str] = None,
ontology_file: Optional[Union[str, List[str]]] = None,
matching_strategy: Optional[MatchingStrategy] = None,
) -> None:
super().__init__(matching_strategy)
self.ontology_file = ontology_file
try:
if ontology_file and os.path.exists(ontology_file):
# Convert single file to list for uniform processing
files_to_load = []
if ontology_file is not None:
if isinstance(ontology_file, str):
files_to_load = [ontology_file]
elif isinstance(ontology_file, list):
files_to_load = ontology_file
else:
raise ValueError(f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}")
# Load ontology files into a single graph
if files_to_load:
self.graph = Graph()
self.graph.parse(ontology_file)
logger.info("Ontology loaded successfully from file: %s", ontology_file)
loaded_files = []
for file_path in files_to_load:
if os.path.exists(file_path):
self.graph.parse(file_path)
loaded_files.append(file_path)
logger.info("Ontology loaded successfully from file: %s", file_path)
else:
logger.warning(
"Ontology file '%s' not found. Skipping this file.",
file_path,
)
if not loaded_files:
logger.info("No valid ontology files found. No owl ontology will be attached to the graph.")
self.graph = None
else:
logger.info("Total ontology files loaded: %d", len(loaded_files))
else:
logger.info(
"Ontology file '%s' not found. No owl ontology will be attached to the graph.",
ontology_file,
)
logger.info("No ontology file provided. No owl ontology will be attached to the graph.")
self.graph = None
self.build_lookup()
except Exception as e:
logger.error("Failed to load ontology", exc_info=e)