feat: adds matching strategies and moves resolver
This commit is contained in:
parent
f331cf85fb
commit
93a383b56a
9 changed files with 76 additions and 14 deletions
|
|
@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
|
||||||
from cognee.modules.pipelines import run_pipeline
|
from cognee.modules.pipelines import run_pipeline
|
||||||
from cognee.modules.pipelines.tasks.task import Task
|
from cognee.modules.pipelines.tasks.task import Task
|
||||||
from cognee.modules.chunking.TextChunker import TextChunker
|
from cognee.modules.chunking.TextChunker import TextChunker
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
|
|
||||||
from cognee.tasks.documents import (
|
from cognee.tasks.documents import (
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
|
||||||
from cognee.tasks.graph import extract_graph_from_data
|
from cognee.tasks.graph import extract_graph_from_data
|
||||||
from cognee.tasks.storage import add_data_points
|
from cognee.tasks.storage import add_data_points
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||||
|
|
||||||
|
|
||||||
async def get_default_tasks_by_indices(
|
async def get_default_tasks_by_indices(
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from cognee.modules.engine.utils import (
|
||||||
generate_node_name,
|
generate_node_name,
|
||||||
)
|
)
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||||
|
|
||||||
|
|
||||||
def _create_node_key(node_id: str, category: str) -> str:
|
def _create_node_key(node_id: str, category: str) -> str:
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,20 @@ from abc import ABC, abstractmethod
|
||||||
from typing import List, Tuple, Optional
|
from typing import List, Tuple, Optional
|
||||||
|
|
||||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||||
|
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
||||||
|
|
||||||
|
|
||||||
class BaseOntologyResolver(ABC):
|
class BaseOntologyResolver(ABC):
|
||||||
"""Abstract base class for ontology resolvers."""
|
"""Abstract base class for ontology resolvers."""
|
||||||
|
|
||||||
|
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
|
||||||
|
"""Initialize the ontology resolver with a matching strategy.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
matching_strategy: The strategy to use for entity matching.
|
||||||
|
Defaults to FuzzyMatchingStrategy if None.
|
||||||
|
"""
|
||||||
|
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def build_lookup(self) -> None:
|
def build_lookup(self) -> None:
|
||||||
|
|
|
||||||
55
cognee/modules/ontology/matching_strategies.py
Normal file
55
cognee/modules/ontology/matching_strategies.py
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
import difflib
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class MatchingStrategy(ABC):
|
||||||
|
"""Abstract base class for ontology entity matching strategies."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
||||||
|
"""Find the best match for a given name from a list of candidates.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The name to match
|
||||||
|
candidates: List of candidate names to match against
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The best matching candidate name, or None if no match found
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FuzzyMatchingStrategy(MatchingStrategy):
|
||||||
|
"""Fuzzy matching strategy using difflib for approximate string matching."""
|
||||||
|
|
||||||
|
def __init__(self, cutoff: float = 0.8):
|
||||||
|
"""Initialize fuzzy matching strategy.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
|
||||||
|
"""
|
||||||
|
self.cutoff = cutoff
|
||||||
|
|
||||||
|
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
||||||
|
"""Find the closest fuzzy match for a given name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The normalized name to match
|
||||||
|
candidates: List of normalized candidate names
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The best matching candidate name, or None if no match meets the cutoff
|
||||||
|
"""
|
||||||
|
if not candidates:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Check for exact match first
|
||||||
|
if name in candidates:
|
||||||
|
return name
|
||||||
|
|
||||||
|
# Find fuzzy match
|
||||||
|
best_match = difflib.get_close_matches(
|
||||||
|
name, candidates, n=1, cutoff=self.cutoff
|
||||||
|
)
|
||||||
|
return best_match[0] if best_match else None
|
||||||
|
|
@ -12,6 +12,7 @@ from cognee.modules.ontology.exceptions import (
|
||||||
)
|
)
|
||||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||||
|
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
||||||
|
|
||||||
logger = get_logger("OntologyAdapter")
|
logger = get_logger("OntologyAdapter")
|
||||||
|
|
||||||
|
|
@ -23,7 +24,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
|
||||||
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
|
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, ontology_file: Optional[str] = None):
|
def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None):
|
||||||
|
super().__init__(matching_strategy)
|
||||||
self.ontology_file = ontology_file
|
self.ontology_file = ontology_file
|
||||||
try:
|
try:
|
||||||
if ontology_file and os.path.exists(ontology_file):
|
if ontology_file and os.path.exists(ontology_file):
|
||||||
|
|
@ -94,13 +96,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
|
||||||
try:
|
try:
|
||||||
normalized_name = name.lower().replace(" ", "_").strip()
|
normalized_name = name.lower().replace(" ", "_").strip()
|
||||||
possible_matches = list(self.lookup.get(category, {}).keys())
|
possible_matches = list(self.lookup.get(category, {}).keys())
|
||||||
if normalized_name in possible_matches:
|
|
||||||
return normalized_name
|
return self.matching_strategy.find_match(normalized_name, possible_matches)
|
||||||
|
|
||||||
best_match = difflib.get_close_matches(
|
|
||||||
normalized_name, possible_matches, n=1, cutoff=0.8
|
|
||||||
)
|
|
||||||
return best_match[0] if best_match else None
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error in find_closest_match: %s", str(e))
|
logger.error("Error in find_closest_match: %s", str(e))
|
||||||
raise FindClosestMatchError() from e
|
raise FindClosestMatchError() from e
|
||||||
|
|
@ -4,7 +4,7 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.tasks.storage.add_data_points import add_data_points
|
from cognee.tasks.storage.add_data_points import add_data_points
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||||
from cognee.modules.graph.utils import (
|
from cognee.modules.graph.utils import (
|
||||||
expand_with_nodes_and_edges,
|
expand_with_nodes_and_edges,
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ from typing import List
|
||||||
|
|
||||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||||
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
|
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
|
||||||
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
|
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
|
||||||
extract_content_nodes_and_relationship_names,
|
extract_content_nodes_and_relationship_names,
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
from rdflib import Graph, Namespace, RDF, OWL, RDFS
|
from rdflib import Graph, Namespace, RDF, OWL, RDFS
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue