diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index e4f91b44c..a35658691 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -230,7 +230,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task( extract_graph_from_data, graph_model=graph_model, - ontology_adapter=OntologyResolver(ontology_file=ontology_file_path), + ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path), custom_prompt=custom_prompt, task_config={"batch_size": 10}, ), # Generate knowledge graphs from the document chunks. diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py index be532232f..677090a58 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py @@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver async def get_default_tasks_by_indices( @@ -33,7 +33,7 @@ async def get_no_summary_tasks( # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) - ontology_adapter = OntologyResolver(ontology_file=ontology_file_path) + ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path) graph_task = Task( extract_graph_from_data, diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 125f59e72..3bd62e6e0 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -8,7 +8,7 @@ from cognee.modules.engine.utils import ( generate_node_name, ) from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver def _create_node_key(node_id: str, category: str) -> str: @@ -83,7 +83,7 @@ def _process_ontology_edges( def _create_type_node( node_type: str, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -141,7 +141,7 @@ def _create_entity_node( node_name: str, node_description: str, type_node: EntityType, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -198,7 +198,7 @@ def _create_entity_node( def _process_graph_nodes( data_chunk: DocumentChunk, graph: KnowledgeGraph, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -277,7 +277,7 @@ def _process_graph_edges( def expand_with_nodes_and_edges( data_chunks: list[DocumentChunk], chunk_graphs: list[KnowledgeGraph], - ontology_resolver: OntologyResolver = None, + ontology_resolver: RDFLibOntologyResolver = None, existing_edges_map: Optional[dict[str, bool]] = None, ): """ @@ -296,8 +296,8 @@ def expand_with_nodes_and_edges( chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each data chunk. Each graph contains nodes (entities) and edges (relationships) extracted from the chunk content. - ontology_resolver (OntologyResolver, optional): Resolver for validating entities and - types against an ontology. If None, a default OntologyResolver is created. + ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and + types against an ontology. If None, a default RDFLibOntologyResolver is created. Defaults to None. existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}". @@ -320,7 +320,7 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = OntologyResolver() + ontology_resolver = RDFLibOntologyResolver() added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py new file mode 100644 index 000000000..55826bfb0 --- /dev/null +++ b/cognee/modules/ontology/base_ontology_resolver.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod +from typing import List, Tuple, Optional + +from cognee.modules.ontology.models import AttachedOntologyNode + + +class BaseOntologyResolver(ABC): + """Abstract base class for ontology resolvers.""" + + @abstractmethod + def build_lookup(self) -> None: + """Build the lookup dictionary for ontology entities.""" + pass + + @abstractmethod + def refresh_lookup(self) -> None: + """Refresh the lookup dictionary.""" + pass + + @abstractmethod + def find_closest_match(self, name: str, category: str) -> Optional[str]: + """Find the closest match for a given name in the specified category.""" + pass + + @abstractmethod + def get_subgraph( + self, node_name: str, node_type: str = "individuals", directed: bool = True + ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: + """Get a subgraph for the given node.""" + pass diff --git a/cognee/modules/ontology/models.py b/cognee/modules/ontology/models.py new file mode 100644 index 000000000..eefa9e5dd --- /dev/null +++ b/cognee/modules/ontology/models.py @@ -0,0 +1,20 @@ +from typing import Any + + +class AttachedOntologyNode: + """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface.""" + + def __init__(self, uri: Any, category: str): + self.uri = uri + self.name = self._extract_name(uri) + self.category = category + + @staticmethod + def _extract_name(uri: Any) -> str: + uri_str = str(uri) + if "#" in uri_str: + return uri_str.split("#")[-1] + return uri_str.rstrip("/").split("/")[-1] + + def __repr__(self): + return f"AttachedOntologyNode(name={self.name}, category={self.category})" diff --git a/cognee/modules/ontology/rdf_xml/OntologyResolver.py b/cognee/modules/ontology/rdf_xml/OntologyResolver.py index 7f3fa004d..3c1a55b5a 100644 --- a/cognee/modules/ontology/rdf_xml/OntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/OntologyResolver.py @@ -10,30 +10,19 @@ from cognee.modules.ontology.exceptions import ( FindClosestMatchError, GetSubgraphError, ) +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.models import AttachedOntologyNode logger = get_logger("OntologyAdapter") -class AttachedOntologyNode: - """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface.""" - - def __init__(self, uri: URIRef, category: str): - self.uri = uri - self.name = self._extract_name(uri) - self.category = category - - @staticmethod - def _extract_name(uri: URIRef) -> str: - uri_str = str(uri) - if "#" in uri_str: - return uri_str.split("#")[-1] - return uri_str.rstrip("/").split("/")[-1] - - def __repr__(self): - return f"AttachedOntologyNode(name={self.name}, category={self.category})" - - -class OntologyResolver: +class RDFLibOntologyResolver(BaseOntologyResolver): + """RDFLib-based ontology resolver implementation. + + This implementation uses RDFLib to parse and work with RDF/OWL ontology files. + It provides fuzzy matching and subgraph extraction capabilities for ontology entities. + """ + def __init__(self, ontology_file: Optional[str] = None): self.ontology_file = ontology_file try: @@ -60,7 +49,7 @@ class OntologyResolver: name = uri_str.rstrip("/").split("/")[-1] return name.lower().replace(" ", "_").strip() - def build_lookup(self): + def build_lookup(self) -> None: try: classes: Dict[str, URIRef] = {} individuals: Dict[str, URIRef] = {} @@ -97,7 +86,7 @@ class OntologyResolver: logger.error("Failed to build lookup dictionary: %s", str(e)) raise RuntimeError("Lookup build failed") from e - def refresh_lookup(self): + def refresh_lookup(self) -> None: self.build_lookup() logger.info("Ontology lookup refreshed.") @@ -125,7 +114,7 @@ class OntologyResolver: def get_subgraph( self, node_name: str, node_type: str = "individuals", directed: bool = True - ) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]: + ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: nodes_set = set() edges: List[Tuple[str, str, str]] = [] visited = set() diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index d81516206..2ad32f308 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( expand_with_nodes_and_edges, @@ -24,7 +24,7 @@ async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, graph_model: Type[BaseModel], - ontology_adapter: OntologyResolver, + ontology_adapter: RDFLibOntologyResolver, ) -> List[DocumentChunk]: """Updates DocumentChunk objects, integrates data points and edges into databases.""" @@ -70,7 +70,7 @@ async def integrate_chunk_graphs( async def extract_graph_from_data( data_chunks: List[DocumentChunk], graph_model: Type[BaseModel], - ontology_adapter: OntologyResolver = None, + ontology_adapter: RDFLibOntologyResolver = None, custom_prompt: Optional[str] = None, ) -> List[DocumentChunk]: """ diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index c1f43df5c..ce69f9b0e 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -3,7 +3,7 @@ from typing import List from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( extract_content_nodes_and_relationship_names, @@ -17,7 +17,7 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs async def extract_graph_from_data( data_chunks: List[DocumentChunk], n_rounds: int = 2, - ontology_adapter: OntologyResolver = None, + ontology_adapter: RDFLibOntologyResolver = None, ) -> List[DocumentChunk]: """Extract and update graph data from document chunks in multiple steps.""" chunk_nodes = await asyncio.gather( diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 8b406e53a..e0a6f1402 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -1,12 +1,13 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.models import AttachedOntologyNode def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - adapter = OntologyResolver() + adapter = RDFLibOntologyResolver() adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -14,7 +15,7 @@ def test_ontology_adapter_initialization_success(): def test_ontology_adapter_initialization_file_not_found(): """Test OntologyAdapter initialization with nonexistent file.""" - adapter = OntologyResolver(ontology_file="nonexistent.owl") + adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl") assert adapter.graph is None @@ -27,7 +28,7 @@ def test_build_lookup(): g.add((ns.Audi, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -50,7 +51,7 @@ def test_find_closest_match_exact(): g.add((ns.Car, RDF.type, OWL.Class)) g.add((ns.Audi, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -71,7 +72,7 @@ def test_find_closest_match_fuzzy(): g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -92,7 +93,7 @@ def test_find_closest_match_no_match(): g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -105,7 +106,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -138,7 +139,7 @@ def test_get_subgraph_success_rdflib(): g.add((ns.VW, owns, ns.Audi)) g.add((ns.VW, owns, ns.Porsche)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -163,7 +164,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup()