feat: adds matching strategies and moves resolver

This commit is contained in:
hajdul88 2025-09-17 12:23:30 +02:00
parent f331cf85fb
commit 93a383b56a
9 changed files with 76 additions and 14 deletions

View file

@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.users.models import User
from cognee.tasks.documents import (

View file

@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
async def get_default_tasks_by_indices(

View file

@ -8,7 +8,7 @@ from cognee.modules.engine.utils import (
generate_node_name,
)
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
def _create_node_key(node_id: str, category: str) -> str:

View file

@ -2,10 +2,20 @@ from abc import ABC, abstractmethod
from typing import List, Tuple, Optional
from cognee.modules.ontology.models import AttachedOntologyNode
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
class BaseOntologyResolver(ABC):
"""Abstract base class for ontology resolvers."""
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
"""Initialize the ontology resolver with a matching strategy.
Args:
matching_strategy: The strategy to use for entity matching.
Defaults to FuzzyMatchingStrategy if None.
"""
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
@abstractmethod
def build_lookup(self) -> None:

View file

@ -0,0 +1,55 @@
import difflib
from abc import ABC, abstractmethod
from typing import List, Optional
class MatchingStrategy(ABC):
"""Abstract base class for ontology entity matching strategies."""
@abstractmethod
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the best match for a given name from a list of candidates.
Args:
name: The name to match
candidates: List of candidate names to match against
Returns:
The best matching candidate name, or None if no match found
"""
pass
class FuzzyMatchingStrategy(MatchingStrategy):
"""Fuzzy matching strategy using difflib for approximate string matching."""
def __init__(self, cutoff: float = 0.8):
"""Initialize fuzzy matching strategy.
Args:
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
"""
self.cutoff = cutoff
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the closest fuzzy match for a given name.
Args:
name: The normalized name to match
candidates: List of normalized candidate names
Returns:
The best matching candidate name, or None if no match meets the cutoff
"""
if not candidates:
return None
# Check for exact match first
if name in candidates:
return name
# Find fuzzy match
best_match = difflib.get_close_matches(
name, candidates, n=1, cutoff=self.cutoff
)
return best_match[0] if best_match else None

View file

@ -12,6 +12,7 @@ from cognee.modules.ontology.exceptions import (
)
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
logger = get_logger("OntologyAdapter")
@ -23,7 +24,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
"""
def __init__(self, ontology_file: Optional[str] = None):
def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None):
super().__init__(matching_strategy)
self.ontology_file = ontology_file
try:
if ontology_file and os.path.exists(ontology_file):
@ -94,13 +96,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
try:
normalized_name = name.lower().replace(" ", "_").strip()
possible_matches = list(self.lookup.get(category, {}).keys())
if normalized_name in possible_matches:
return normalized_name
best_match = difflib.get_close_matches(
normalized_name, possible_matches, n=1, cutoff=0.8
)
return best_match[0] if best_match else None
return self.matching_strategy.find_match(normalized_name, possible_matches)
except Exception as e:
logger.error("Error in find_closest_match: %s", str(e))
raise FindClosestMatchError() from e

View file

@ -4,7 +4,7 @@ from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.tasks.storage.add_data_points import add_data_points
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import (
expand_with_nodes_and_edges,

View file

@ -3,7 +3,7 @@ from typing import List
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
extract_content_nodes_and_relationship_names,

View file

@ -1,6 +1,6 @@
import pytest
from rdflib import Graph, Namespace, RDF, OWL, RDFS
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode