This commit is contained in:
hajdul88 2025-09-17 12:24:39 +02:00
parent 6e47de4979
commit 00c3ba3a0c
3 changed files with 28 additions and 22 deletions

View file

@ -7,12 +7,12 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyM
class BaseOntologyResolver(ABC): class BaseOntologyResolver(ABC):
"""Abstract base class for ontology resolvers.""" """Abstract base class for ontology resolvers."""
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None): def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
"""Initialize the ontology resolver with a matching strategy. """Initialize the ontology resolver with a matching strategy.
Args: Args:
matching_strategy: The strategy to use for entity matching. matching_strategy: The strategy to use for entity matching.
Defaults to FuzzyMatchingStrategy if None. Defaults to FuzzyMatchingStrategy if None.
""" """
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy() self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
@ -35,6 +35,8 @@ class BaseOntologyResolver(ABC):
@abstractmethod @abstractmethod
def get_subgraph( def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: ) -> Tuple[
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
]:
"""Get a subgraph for the given node.""" """Get a subgraph for the given node."""
pass pass

View file

@ -5,15 +5,15 @@ from typing import List, Optional
class MatchingStrategy(ABC): class MatchingStrategy(ABC):
"""Abstract base class for ontology entity matching strategies.""" """Abstract base class for ontology entity matching strategies."""
@abstractmethod @abstractmethod
def find_match(self, name: str, candidates: List[str]) -> Optional[str]: def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the best match for a given name from a list of candidates. """Find the best match for a given name from a list of candidates.
Args: Args:
name: The name to match name: The name to match
candidates: List of candidate names to match against candidates: List of candidate names to match against
Returns: Returns:
The best matching candidate name, or None if no match found The best matching candidate name, or None if no match found
""" """
@ -22,34 +22,32 @@ class MatchingStrategy(ABC):
class FuzzyMatchingStrategy(MatchingStrategy): class FuzzyMatchingStrategy(MatchingStrategy):
"""Fuzzy matching strategy using difflib for approximate string matching.""" """Fuzzy matching strategy using difflib for approximate string matching."""
def __init__(self, cutoff: float = 0.8): def __init__(self, cutoff: float = 0.8):
"""Initialize fuzzy matching strategy. """Initialize fuzzy matching strategy.
Args: Args:
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
""" """
self.cutoff = cutoff self.cutoff = cutoff
def find_match(self, name: str, candidates: List[str]) -> Optional[str]: def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the closest fuzzy match for a given name. """Find the closest fuzzy match for a given name.
Args: Args:
name: The normalized name to match name: The normalized name to match
candidates: List of normalized candidate names candidates: List of normalized candidate names
Returns: Returns:
The best matching candidate name, or None if no match meets the cutoff The best matching candidate name, or None if no match meets the cutoff
""" """
if not candidates: if not candidates:
return None return None
# Check for exact match first # Check for exact match first
if name in candidates: if name in candidates:
return name return name
# Find fuzzy match # Find fuzzy match
best_match = difflib.get_close_matches( best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
name, candidates, n=1, cutoff=self.cutoff
)
return best_match[0] if best_match else None return best_match[0] if best_match else None

View file

@ -19,12 +19,16 @@ logger = get_logger("OntologyAdapter")
class RDFLibOntologyResolver(BaseOntologyResolver): class RDFLibOntologyResolver(BaseOntologyResolver):
"""RDFLib-based ontology resolver implementation. """RDFLib-based ontology resolver implementation.
This implementation uses RDFLib to parse and work with RDF/OWL ontology files. This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
It provides fuzzy matching and subgraph extraction capabilities for ontology entities. It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
""" """
def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None): def __init__(
self,
ontology_file: Optional[str] = None,
matching_strategy: Optional[MatchingStrategy] = None,
):
super().__init__(matching_strategy) super().__init__(matching_strategy)
self.ontology_file = ontology_file self.ontology_file = ontology_file
try: try:
@ -96,7 +100,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
try: try:
normalized_name = name.lower().replace(" ", "_").strip() normalized_name = name.lower().replace(" ", "_").strip()
possible_matches = list(self.lookup.get(category, {}).keys()) possible_matches = list(self.lookup.get(category, {}).keys())
return self.matching_strategy.find_match(normalized_name, possible_matches) return self.matching_strategy.find_match(normalized_name, possible_matches)
except Exception as e: except Exception as e:
logger.error("Error in find_closest_match: %s", str(e)) logger.error("Error in find_closest_match: %s", str(e))
@ -111,7 +115,9 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
def get_subgraph( def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: ) -> Tuple[
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
]:
nodes_set = set() nodes_set = set()
edges: List[Tuple[str, str, str]] = [] edges: List[Tuple[str, str, str]] = []
visited = set() visited = set()