feat: adds matching strategies and moves resolver
This commit is contained in:
parent
f331cf85fb
commit
93a383b56a
9 changed files with 76 additions and 14 deletions
|
|
@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
|
|||
from cognee.modules.pipelines import run_pipeline
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.chunking.TextChunker import TextChunker
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.users.models import User
|
||||
|
||||
from cognee.tasks.documents import (
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
|
|||
from cognee.tasks.graph import extract_graph_from_data
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
|
||||
|
||||
async def get_default_tasks_by_indices(
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from cognee.modules.engine.utils import (
|
|||
generate_node_name,
|
||||
)
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
|
||||
|
||||
def _create_node_key(node_id: str, category: str) -> str:
|
||||
|
|
|
|||
|
|
@ -2,10 +2,20 @@ from abc import ABC, abstractmethod
|
|||
from typing import List, Tuple, Optional
|
||||
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
||||
|
||||
|
||||
class BaseOntologyResolver(ABC):
|
||||
"""Abstract base class for ontology resolvers."""
|
||||
|
||||
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
|
||||
"""Initialize the ontology resolver with a matching strategy.
|
||||
|
||||
Args:
|
||||
matching_strategy: The strategy to use for entity matching.
|
||||
Defaults to FuzzyMatchingStrategy if None.
|
||||
"""
|
||||
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
|
||||
|
||||
@abstractmethod
|
||||
def build_lookup(self) -> None:
|
||||
|
|
|
|||
55
cognee/modules/ontology/matching_strategies.py
Normal file
55
cognee/modules/ontology/matching_strategies.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
import difflib
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class MatchingStrategy(ABC):
|
||||
"""Abstract base class for ontology entity matching strategies."""
|
||||
|
||||
@abstractmethod
|
||||
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
||||
"""Find the best match for a given name from a list of candidates.
|
||||
|
||||
Args:
|
||||
name: The name to match
|
||||
candidates: List of candidate names to match against
|
||||
|
||||
Returns:
|
||||
The best matching candidate name, or None if no match found
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class FuzzyMatchingStrategy(MatchingStrategy):
|
||||
"""Fuzzy matching strategy using difflib for approximate string matching."""
|
||||
|
||||
def __init__(self, cutoff: float = 0.8):
|
||||
"""Initialize fuzzy matching strategy.
|
||||
|
||||
Args:
|
||||
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
|
||||
"""
|
||||
self.cutoff = cutoff
|
||||
|
||||
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
||||
"""Find the closest fuzzy match for a given name.
|
||||
|
||||
Args:
|
||||
name: The normalized name to match
|
||||
candidates: List of normalized candidate names
|
||||
|
||||
Returns:
|
||||
The best matching candidate name, or None if no match meets the cutoff
|
||||
"""
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# Check for exact match first
|
||||
if name in candidates:
|
||||
return name
|
||||
|
||||
# Find fuzzy match
|
||||
best_match = difflib.get_close_matches(
|
||||
name, candidates, n=1, cutoff=self.cutoff
|
||||
)
|
||||
return best_match[0] if best_match else None
|
||||
|
|
@ -12,6 +12,7 @@ from cognee.modules.ontology.exceptions import (
|
|||
)
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
||||
|
||||
logger = get_logger("OntologyAdapter")
|
||||
|
||||
|
|
@ -23,7 +24,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
|
|||
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
|
||||
"""
|
||||
|
||||
def __init__(self, ontology_file: Optional[str] = None):
|
||||
def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None):
|
||||
super().__init__(matching_strategy)
|
||||
self.ontology_file = ontology_file
|
||||
try:
|
||||
if ontology_file and os.path.exists(ontology_file):
|
||||
|
|
@ -94,13 +96,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
|
|||
try:
|
||||
normalized_name = name.lower().replace(" ", "_").strip()
|
||||
possible_matches = list(self.lookup.get(category, {}).keys())
|
||||
if normalized_name in possible_matches:
|
||||
return normalized_name
|
||||
|
||||
best_match = difflib.get_close_matches(
|
||||
normalized_name, possible_matches, n=1, cutoff=0.8
|
||||
)
|
||||
return best_match[0] if best_match else None
|
||||
|
||||
return self.matching_strategy.find_match(normalized_name, possible_matches)
|
||||
except Exception as e:
|
||||
logger.error("Error in find_closest_match: %s", str(e))
|
||||
raise FindClosestMatchError() from e
|
||||
|
|
@ -4,7 +4,7 @@ from pydantic import BaseModel
|
|||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.tasks.storage.add_data_points import add_data_points
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||
from cognee.modules.graph.utils import (
|
||||
expand_with_nodes_and_edges,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from typing import List
|
|||
|
||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
|
||||
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
|
||||
extract_content_nodes_and_relationship_names,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
from rdflib import Graph, Namespace, RDF, OWL, RDFS
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue