This commit is contained in:
hajdul88 2025-09-17 12:24:39 +02:00
parent 6e47de4979
commit 00c3ba3a0c
3 changed files with 28 additions and 22 deletions

View file

@ -7,12 +7,12 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyM
class BaseOntologyResolver(ABC):
"""Abstract base class for ontology resolvers."""
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
"""Initialize the ontology resolver with a matching strategy.
Args:
matching_strategy: The strategy to use for entity matching.
matching_strategy: The strategy to use for entity matching.
Defaults to FuzzyMatchingStrategy if None.
"""
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
@ -35,6 +35,8 @@ class BaseOntologyResolver(ABC):
@abstractmethod
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
) -> Tuple[
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
]:
"""Get a subgraph for the given node."""
pass

View file

@ -5,15 +5,15 @@ from typing import List, Optional
class MatchingStrategy(ABC):
"""Abstract base class for ontology entity matching strategies."""
@abstractmethod
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the best match for a given name from a list of candidates.
Args:
name: The name to match
candidates: List of candidate names to match against
Returns:
The best matching candidate name, or None if no match found
"""
@ -22,34 +22,32 @@ class MatchingStrategy(ABC):
class FuzzyMatchingStrategy(MatchingStrategy):
"""Fuzzy matching strategy using difflib for approximate string matching."""
def __init__(self, cutoff: float = 0.8):
"""Initialize fuzzy matching strategy.
Args:
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
"""
self.cutoff = cutoff
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the closest fuzzy match for a given name.
Args:
name: The normalized name to match
candidates: List of normalized candidate names
Returns:
The best matching candidate name, or None if no match meets the cutoff
"""
if not candidates:
return None
# Check for exact match first
if name in candidates:
return name
# Find fuzzy match
best_match = difflib.get_close_matches(
name, candidates, n=1, cutoff=self.cutoff
)
best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
return best_match[0] if best_match else None

View file

@ -19,12 +19,16 @@ logger = get_logger("OntologyAdapter")
class RDFLibOntologyResolver(BaseOntologyResolver):
"""RDFLib-based ontology resolver implementation.
This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
"""
def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None):
def __init__(
self,
ontology_file: Optional[str] = None,
matching_strategy: Optional[MatchingStrategy] = None,
):
super().__init__(matching_strategy)
self.ontology_file = ontology_file
try:
@ -96,7 +100,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
try:
normalized_name = name.lower().replace(" ", "_").strip()
possible_matches = list(self.lookup.get(category, {}).keys())
return self.matching_strategy.find_match(normalized_name, possible_matches)
except Exception as e:
logger.error("Error in find_closest_match: %s", str(e))
@ -111,7 +115,9 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
) -> Tuple[
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
]:
nodes_set = set()
edges: List[Tuple[str, str, str]] = []
visited = set()