feat: adds base class + renames rdflib implementation

This commit is contained in:
hajdul88 2025-09-17 12:02:38 +02:00
parent fc49725126
commit f651991c86
9 changed files with 90 additions and 50 deletions

View file

@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.users.models import User
from cognee.tasks.documents import (
@ -230,7 +230,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
Task(
extract_graph_from_data,
graph_model=graph_model,
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path),
custom_prompt=custom_prompt,
task_config={"batch_size": 10},
), # Generate knowledge graphs from the document chunks.

View file

@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
async def get_default_tasks_by_indices(
@ -33,7 +33,7 @@ async def get_no_summary_tasks(
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
graph_task = Task(
extract_graph_from_data,

View file

@ -8,7 +8,7 @@ from cognee.modules.engine.utils import (
generate_node_name,
)
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
def _create_node_key(node_id: str, category: str) -> str:
@ -83,7 +83,7 @@ def _process_ontology_edges(
def _create_type_node(
node_type: str,
ontology_resolver: OntologyResolver,
ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@ -141,7 +141,7 @@ def _create_entity_node(
node_name: str,
node_description: str,
type_node: EntityType,
ontology_resolver: OntologyResolver,
ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@ -198,7 +198,7 @@ def _create_entity_node(
def _process_graph_nodes(
data_chunk: DocumentChunk,
graph: KnowledgeGraph,
ontology_resolver: OntologyResolver,
ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@ -277,7 +277,7 @@ def _process_graph_edges(
def expand_with_nodes_and_edges(
data_chunks: list[DocumentChunk],
chunk_graphs: list[KnowledgeGraph],
ontology_resolver: OntologyResolver = None,
ontology_resolver: RDFLibOntologyResolver = None,
existing_edges_map: Optional[dict[str, bool]] = None,
):
"""
@ -296,8 +296,8 @@ def expand_with_nodes_and_edges(
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
from the chunk content.
ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
types against an ontology. If None, a default OntologyResolver is created.
ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and
types against an ontology. If None, a default RDFLibOntologyResolver is created.
Defaults to None.
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
@ -320,7 +320,7 @@ def expand_with_nodes_and_edges(
existing_edges_map = {}
if ontology_resolver is None:
ontology_resolver = OntologyResolver()
ontology_resolver = RDFLibOntologyResolver()
added_nodes_map = {}
added_ontology_nodes_map = {}

View file

@ -0,0 +1,30 @@
from abc import ABC, abstractmethod
from typing import List, Tuple, Optional
from cognee.modules.ontology.models import AttachedOntologyNode
class BaseOntologyResolver(ABC):
"""Abstract base class for ontology resolvers."""
@abstractmethod
def build_lookup(self) -> None:
"""Build the lookup dictionary for ontology entities."""
pass
@abstractmethod
def refresh_lookup(self) -> None:
"""Refresh the lookup dictionary."""
pass
@abstractmethod
def find_closest_match(self, name: str, category: str) -> Optional[str]:
"""Find the closest match for a given name in the specified category."""
pass
@abstractmethod
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
"""Get a subgraph for the given node."""
pass

View file

@ -0,0 +1,20 @@
from typing import Any
class AttachedOntologyNode:
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
def __init__(self, uri: Any, category: str):
self.uri = uri
self.name = self._extract_name(uri)
self.category = category
@staticmethod
def _extract_name(uri: Any) -> str:
uri_str = str(uri)
if "#" in uri_str:
return uri_str.split("#")[-1]
return uri_str.rstrip("/").split("/")[-1]
def __repr__(self):
return f"AttachedOntologyNode(name={self.name}, category={self.category})"

View file

@ -10,30 +10,19 @@ from cognee.modules.ontology.exceptions import (
FindClosestMatchError,
GetSubgraphError,
)
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
logger = get_logger("OntologyAdapter")
class AttachedOntologyNode:
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
def __init__(self, uri: URIRef, category: str):
self.uri = uri
self.name = self._extract_name(uri)
self.category = category
@staticmethod
def _extract_name(uri: URIRef) -> str:
uri_str = str(uri)
if "#" in uri_str:
return uri_str.split("#")[-1]
return uri_str.rstrip("/").split("/")[-1]
def __repr__(self):
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
class OntologyResolver:
class RDFLibOntologyResolver(BaseOntologyResolver):
"""RDFLib-based ontology resolver implementation.
This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
"""
def __init__(self, ontology_file: Optional[str] = None):
self.ontology_file = ontology_file
try:
@ -60,7 +49,7 @@ class OntologyResolver:
name = uri_str.rstrip("/").split("/")[-1]
return name.lower().replace(" ", "_").strip()
def build_lookup(self):
def build_lookup(self) -> None:
try:
classes: Dict[str, URIRef] = {}
individuals: Dict[str, URIRef] = {}
@ -97,7 +86,7 @@ class OntologyResolver:
logger.error("Failed to build lookup dictionary: %s", str(e))
raise RuntimeError("Lookup build failed") from e
def refresh_lookup(self):
def refresh_lookup(self) -> None:
self.build_lookup()
logger.info("Ontology lookup refreshed.")
@ -125,7 +114,7 @@ class OntologyResolver:
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
nodes_set = set()
edges: List[Tuple[str, str, str]] = []
visited = set()

View file

@ -4,7 +4,7 @@ from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.tasks.storage.add_data_points import add_data_points
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import (
expand_with_nodes_and_edges,
@ -24,7 +24,7 @@ async def integrate_chunk_graphs(
data_chunks: list[DocumentChunk],
chunk_graphs: list,
graph_model: Type[BaseModel],
ontology_adapter: OntologyResolver,
ontology_adapter: RDFLibOntologyResolver,
) -> List[DocumentChunk]:
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
@ -70,7 +70,7 @@ async def integrate_chunk_graphs(
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
graph_model: Type[BaseModel],
ontology_adapter: OntologyResolver = None,
ontology_adapter: RDFLibOntologyResolver = None,
custom_prompt: Optional[str] = None,
) -> List[DocumentChunk]:
"""

View file

@ -3,7 +3,7 @@ from typing import List
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
extract_content_nodes_and_relationship_names,
@ -17,7 +17,7 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
n_rounds: int = 2,
ontology_adapter: OntologyResolver = None,
ontology_adapter: RDFLibOntologyResolver = None,
) -> List[DocumentChunk]:
"""Extract and update graph data from document chunks in multiple steps."""
chunk_nodes = await asyncio.gather(

View file

@ -1,12 +1,13 @@
import pytest
from rdflib import Graph, Namespace, RDF, OWL, RDFS
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
def test_ontology_adapter_initialization_success():
"""Test successful initialization of OntologyAdapter."""
adapter = OntologyResolver()
adapter = RDFLibOntologyResolver()
adapter.build_lookup()
assert isinstance(adapter.lookup, dict)
@ -14,7 +15,7 @@ def test_ontology_adapter_initialization_success():
def test_ontology_adapter_initialization_file_not_found():
"""Test OntologyAdapter initialization with nonexistent file."""
adapter = OntologyResolver(ontology_file="nonexistent.owl")
adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl")
assert adapter.graph is None
@ -27,7 +28,7 @@ def test_build_lookup():
g.add((ns.Audi, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -50,7 +51,7 @@ def test_find_closest_match_exact():
g.add((ns.Car, RDF.type, OWL.Class))
g.add((ns.Audi, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -71,7 +72,7 @@ def test_find_closest_match_fuzzy():
g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -92,7 +93,7 @@ def test_find_closest_match_no_match():
g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -105,7 +106,7 @@ def test_get_subgraph_no_match_rdflib():
"""Test get_subgraph returns empty results for a non-existent node."""
g = Graph()
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -138,7 +139,7 @@ def test_get_subgraph_success_rdflib():
g.add((ns.VW, owns, ns.Audi))
g.add((ns.VW, owns, ns.Porsche))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -163,7 +164,7 @@ def test_refresh_lookup_rdflib():
"""Test that refresh_lookup rebuilds the lookup dict into a new object."""
g = Graph()
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()