feat: adds base class + renames rdflib implementation

This commit is contained in:
hajdul88 2025-09-17 12:02:38 +02:00
parent fc49725126
commit f651991c86
9 changed files with 90 additions and 50 deletions

View file

@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.tasks.documents import ( from cognee.tasks.documents import (
@ -230,7 +230,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
Task( Task(
extract_graph_from_data, extract_graph_from_data,
graph_model=graph_model, graph_model=graph_model,
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path), ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path),
custom_prompt=custom_prompt, custom_prompt=custom_prompt,
task_config={"batch_size": 10}, task_config={"batch_size": 10},
), # Generate knowledge graphs from the document chunks. ), # Generate knowledge graphs from the document chunks.

View file

@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points from cognee.tasks.storage import add_data_points
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
async def get_default_tasks_by_indices( async def get_default_tasks_by_indices(
@ -33,7 +33,7 @@ async def get_no_summary_tasks(
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
ontology_adapter = OntologyResolver(ontology_file=ontology_file_path) ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
graph_task = Task( graph_task = Task(
extract_graph_from_data, extract_graph_from_data,

View file

@ -8,7 +8,7 @@ from cognee.modules.engine.utils import (
generate_node_name, generate_node_name,
) )
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
def _create_node_key(node_id: str, category: str) -> str: def _create_node_key(node_id: str, category: str) -> str:
@ -83,7 +83,7 @@ def _process_ontology_edges(
def _create_type_node( def _create_type_node(
node_type: str, node_type: str,
ontology_resolver: OntologyResolver, ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict, added_nodes_map: dict,
added_ontology_nodes_map: dict, added_ontology_nodes_map: dict,
name_mapping: dict, name_mapping: dict,
@ -141,7 +141,7 @@ def _create_entity_node(
node_name: str, node_name: str,
node_description: str, node_description: str,
type_node: EntityType, type_node: EntityType,
ontology_resolver: OntologyResolver, ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict, added_nodes_map: dict,
added_ontology_nodes_map: dict, added_ontology_nodes_map: dict,
name_mapping: dict, name_mapping: dict,
@ -198,7 +198,7 @@ def _create_entity_node(
def _process_graph_nodes( def _process_graph_nodes(
data_chunk: DocumentChunk, data_chunk: DocumentChunk,
graph: KnowledgeGraph, graph: KnowledgeGraph,
ontology_resolver: OntologyResolver, ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict, added_nodes_map: dict,
added_ontology_nodes_map: dict, added_ontology_nodes_map: dict,
name_mapping: dict, name_mapping: dict,
@ -277,7 +277,7 @@ def _process_graph_edges(
def expand_with_nodes_and_edges( def expand_with_nodes_and_edges(
data_chunks: list[DocumentChunk], data_chunks: list[DocumentChunk],
chunk_graphs: list[KnowledgeGraph], chunk_graphs: list[KnowledgeGraph],
ontology_resolver: OntologyResolver = None, ontology_resolver: RDFLibOntologyResolver = None,
existing_edges_map: Optional[dict[str, bool]] = None, existing_edges_map: Optional[dict[str, bool]] = None,
): ):
""" """
@ -296,8 +296,8 @@ def expand_with_nodes_and_edges(
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
from the chunk content. from the chunk content.
ontology_resolver (OntologyResolver, optional): Resolver for validating entities and ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and
types against an ontology. If None, a default OntologyResolver is created. types against an ontology. If None, a default RDFLibOntologyResolver is created.
Defaults to None. Defaults to None.
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}". duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
@ -320,7 +320,7 @@ def expand_with_nodes_and_edges(
existing_edges_map = {} existing_edges_map = {}
if ontology_resolver is None: if ontology_resolver is None:
ontology_resolver = OntologyResolver() ontology_resolver = RDFLibOntologyResolver()
added_nodes_map = {} added_nodes_map = {}
added_ontology_nodes_map = {} added_ontology_nodes_map = {}

View file

@ -0,0 +1,30 @@
from abc import ABC, abstractmethod
from typing import List, Tuple, Optional
from cognee.modules.ontology.models import AttachedOntologyNode
class BaseOntologyResolver(ABC):
"""Abstract base class for ontology resolvers."""
@abstractmethod
def build_lookup(self) -> None:
"""Build the lookup dictionary for ontology entities."""
pass
@abstractmethod
def refresh_lookup(self) -> None:
"""Refresh the lookup dictionary."""
pass
@abstractmethod
def find_closest_match(self, name: str, category: str) -> Optional[str]:
"""Find the closest match for a given name in the specified category."""
pass
@abstractmethod
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
"""Get a subgraph for the given node."""
pass

View file

@ -0,0 +1,20 @@
from typing import Any
class AttachedOntologyNode:
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
def __init__(self, uri: Any, category: str):
self.uri = uri
self.name = self._extract_name(uri)
self.category = category
@staticmethod
def _extract_name(uri: Any) -> str:
uri_str = str(uri)
if "#" in uri_str:
return uri_str.split("#")[-1]
return uri_str.rstrip("/").split("/")[-1]
def __repr__(self):
return f"AttachedOntologyNode(name={self.name}, category={self.category})"

View file

@ -10,30 +10,19 @@ from cognee.modules.ontology.exceptions import (
FindClosestMatchError, FindClosestMatchError,
GetSubgraphError, GetSubgraphError,
) )
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
logger = get_logger("OntologyAdapter") logger = get_logger("OntologyAdapter")
class AttachedOntologyNode: class RDFLibOntologyResolver(BaseOntologyResolver):
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface.""" """RDFLib-based ontology resolver implementation.
def __init__(self, uri: URIRef, category: str): This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
self.uri = uri It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
self.name = self._extract_name(uri) """
self.category = category
@staticmethod
def _extract_name(uri: URIRef) -> str:
uri_str = str(uri)
if "#" in uri_str:
return uri_str.split("#")[-1]
return uri_str.rstrip("/").split("/")[-1]
def __repr__(self):
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
class OntologyResolver:
def __init__(self, ontology_file: Optional[str] = None): def __init__(self, ontology_file: Optional[str] = None):
self.ontology_file = ontology_file self.ontology_file = ontology_file
try: try:
@ -60,7 +49,7 @@ class OntologyResolver:
name = uri_str.rstrip("/").split("/")[-1] name = uri_str.rstrip("/").split("/")[-1]
return name.lower().replace(" ", "_").strip() return name.lower().replace(" ", "_").strip()
def build_lookup(self): def build_lookup(self) -> None:
try: try:
classes: Dict[str, URIRef] = {} classes: Dict[str, URIRef] = {}
individuals: Dict[str, URIRef] = {} individuals: Dict[str, URIRef] = {}
@ -97,7 +86,7 @@ class OntologyResolver:
logger.error("Failed to build lookup dictionary: %s", str(e)) logger.error("Failed to build lookup dictionary: %s", str(e))
raise RuntimeError("Lookup build failed") from e raise RuntimeError("Lookup build failed") from e
def refresh_lookup(self): def refresh_lookup(self) -> None:
self.build_lookup() self.build_lookup()
logger.info("Ontology lookup refreshed.") logger.info("Ontology lookup refreshed.")
@ -125,7 +114,7 @@ class OntologyResolver:
def get_subgraph( def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]: ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
nodes_set = set() nodes_set = set()
edges: List[Tuple[str, str, str]] = [] edges: List[Tuple[str, str, str]] = []
visited = set() visited = set()

View file

@ -4,7 +4,7 @@ from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.tasks.storage.add_data_points import add_data_points from cognee.tasks.storage.add_data_points import add_data_points
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import ( from cognee.modules.graph.utils import (
expand_with_nodes_and_edges, expand_with_nodes_and_edges,
@ -24,7 +24,7 @@ async def integrate_chunk_graphs(
data_chunks: list[DocumentChunk], data_chunks: list[DocumentChunk],
chunk_graphs: list, chunk_graphs: list,
graph_model: Type[BaseModel], graph_model: Type[BaseModel],
ontology_adapter: OntologyResolver, ontology_adapter: RDFLibOntologyResolver,
) -> List[DocumentChunk]: ) -> List[DocumentChunk]:
"""Updates DocumentChunk objects, integrates data points and edges into databases.""" """Updates DocumentChunk objects, integrates data points and edges into databases."""
@ -70,7 +70,7 @@ async def integrate_chunk_graphs(
async def extract_graph_from_data( async def extract_graph_from_data(
data_chunks: List[DocumentChunk], data_chunks: List[DocumentChunk],
graph_model: Type[BaseModel], graph_model: Type[BaseModel],
ontology_adapter: OntologyResolver = None, ontology_adapter: RDFLibOntologyResolver = None,
custom_prompt: Optional[str] = None, custom_prompt: Optional[str] = None,
) -> List[DocumentChunk]: ) -> List[DocumentChunk]:
""" """

View file

@ -3,7 +3,7 @@ from typing import List
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
extract_content_nodes_and_relationship_names, extract_content_nodes_and_relationship_names,
@ -17,7 +17,7 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
async def extract_graph_from_data( async def extract_graph_from_data(
data_chunks: List[DocumentChunk], data_chunks: List[DocumentChunk],
n_rounds: int = 2, n_rounds: int = 2,
ontology_adapter: OntologyResolver = None, ontology_adapter: RDFLibOntologyResolver = None,
) -> List[DocumentChunk]: ) -> List[DocumentChunk]:
"""Extract and update graph data from document chunks in multiple steps.""" """Extract and update graph data from document chunks in multiple steps."""
chunk_nodes = await asyncio.gather( chunk_nodes = await asyncio.gather(

View file

@ -1,12 +1,13 @@
import pytest import pytest
from rdflib import Graph, Namespace, RDF, OWL, RDFS from rdflib import Graph, Namespace, RDF, OWL, RDFS
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
def test_ontology_adapter_initialization_success(): def test_ontology_adapter_initialization_success():
"""Test successful initialization of OntologyAdapter.""" """Test successful initialization of OntologyAdapter."""
adapter = OntologyResolver() adapter = RDFLibOntologyResolver()
adapter.build_lookup() adapter.build_lookup()
assert isinstance(adapter.lookup, dict) assert isinstance(adapter.lookup, dict)
@ -14,7 +15,7 @@ def test_ontology_adapter_initialization_success():
def test_ontology_adapter_initialization_file_not_found(): def test_ontology_adapter_initialization_file_not_found():
"""Test OntologyAdapter initialization with nonexistent file.""" """Test OntologyAdapter initialization with nonexistent file."""
adapter = OntologyResolver(ontology_file="nonexistent.owl") adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl")
assert adapter.graph is None assert adapter.graph is None
@ -27,7 +28,7 @@ def test_build_lookup():
g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.Audi, RDF.type, ns.Car))
resolver = OntologyResolver() resolver = RDFLibOntologyResolver()
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -50,7 +51,7 @@ def test_find_closest_match_exact():
g.add((ns.Car, RDF.type, OWL.Class)) g.add((ns.Car, RDF.type, OWL.Class))
g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.Audi, RDF.type, ns.Car))
resolver = OntologyResolver() resolver = RDFLibOntologyResolver()
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -71,7 +72,7 @@ def test_find_closest_match_fuzzy():
g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car))
resolver = OntologyResolver() resolver = RDFLibOntologyResolver()
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -92,7 +93,7 @@ def test_find_closest_match_no_match():
g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car))
resolver = OntologyResolver() resolver = RDFLibOntologyResolver()
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -105,7 +106,7 @@ def test_get_subgraph_no_match_rdflib():
"""Test get_subgraph returns empty results for a non-existent node.""" """Test get_subgraph returns empty results for a non-existent node."""
g = Graph() g = Graph()
resolver = OntologyResolver() resolver = RDFLibOntologyResolver()
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -138,7 +139,7 @@ def test_get_subgraph_success_rdflib():
g.add((ns.VW, owns, ns.Audi)) g.add((ns.VW, owns, ns.Audi))
g.add((ns.VW, owns, ns.Porsche)) g.add((ns.VW, owns, ns.Porsche))
resolver = OntologyResolver() resolver = RDFLibOntologyResolver()
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -163,7 +164,7 @@ def test_refresh_lookup_rdflib():
"""Test that refresh_lookup rebuilds the lookup dict into a new object.""" """Test that refresh_lookup rebuilds the lookup dict into a new object."""
g = Graph() g = Graph()
resolver = OntologyResolver() resolver = RDFLibOntologyResolver()
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()