feat: adds base class + renames rdflib implementation
This commit is contained in:
parent
fc49725126
commit
f651991c86
9 changed files with 90 additions and 50 deletions
|
|
@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
|
|||
from cognee.modules.pipelines import run_pipeline
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.chunking.TextChunker import TextChunker
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.users.models import User
|
||||
|
||||
from cognee.tasks.documents import (
|
||||
|
|
@ -230,7 +230,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
Task(
|
||||
extract_graph_from_data,
|
||||
graph_model=graph_model,
|
||||
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
|
||||
ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path),
|
||||
custom_prompt=custom_prompt,
|
||||
task_config={"batch_size": 10},
|
||||
), # Generate knowledge graphs from the document chunks.
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
|
|||
from cognee.tasks.graph import extract_graph_from_data
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
|
||||
|
||||
async def get_default_tasks_by_indices(
|
||||
|
|
@ -33,7 +33,7 @@ async def get_no_summary_tasks(
|
|||
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
||||
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
||||
|
||||
ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
|
||||
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
||||
|
||||
graph_task = Task(
|
||||
extract_graph_from_data,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from cognee.modules.engine.utils import (
|
|||
generate_node_name,
|
||||
)
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
|
||||
|
||||
def _create_node_key(node_id: str, category: str) -> str:
|
||||
|
|
@ -83,7 +83,7 @@ def _process_ontology_edges(
|
|||
|
||||
def _create_type_node(
|
||||
node_type: str,
|
||||
ontology_resolver: OntologyResolver,
|
||||
ontology_resolver: RDFLibOntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
|
|
@ -141,7 +141,7 @@ def _create_entity_node(
|
|||
node_name: str,
|
||||
node_description: str,
|
||||
type_node: EntityType,
|
||||
ontology_resolver: OntologyResolver,
|
||||
ontology_resolver: RDFLibOntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
|
|
@ -198,7 +198,7 @@ def _create_entity_node(
|
|||
def _process_graph_nodes(
|
||||
data_chunk: DocumentChunk,
|
||||
graph: KnowledgeGraph,
|
||||
ontology_resolver: OntologyResolver,
|
||||
ontology_resolver: RDFLibOntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
|
|
@ -277,7 +277,7 @@ def _process_graph_edges(
|
|||
def expand_with_nodes_and_edges(
|
||||
data_chunks: list[DocumentChunk],
|
||||
chunk_graphs: list[KnowledgeGraph],
|
||||
ontology_resolver: OntologyResolver = None,
|
||||
ontology_resolver: RDFLibOntologyResolver = None,
|
||||
existing_edges_map: Optional[dict[str, bool]] = None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -296,8 +296,8 @@ def expand_with_nodes_and_edges(
|
|||
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
|
||||
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
|
||||
from the chunk content.
|
||||
ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
|
||||
types against an ontology. If None, a default OntologyResolver is created.
|
||||
ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and
|
||||
types against an ontology. If None, a default RDFLibOntologyResolver is created.
|
||||
Defaults to None.
|
||||
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
|
||||
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
|
||||
|
|
@ -320,7 +320,7 @@ def expand_with_nodes_and_edges(
|
|||
existing_edges_map = {}
|
||||
|
||||
if ontology_resolver is None:
|
||||
ontology_resolver = OntologyResolver()
|
||||
ontology_resolver = RDFLibOntologyResolver()
|
||||
|
||||
added_nodes_map = {}
|
||||
added_ontology_nodes_map = {}
|
||||
|
|
|
|||
30
cognee/modules/ontology/base_ontology_resolver.py
Normal file
30
cognee/modules/ontology/base_ontology_resolver.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
|
||||
|
||||
class BaseOntologyResolver(ABC):
|
||||
"""Abstract base class for ontology resolvers."""
|
||||
|
||||
@abstractmethod
|
||||
def build_lookup(self) -> None:
|
||||
"""Build the lookup dictionary for ontology entities."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def refresh_lookup(self) -> None:
|
||||
"""Refresh the lookup dictionary."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_closest_match(self, name: str, category: str) -> Optional[str]:
|
||||
"""Find the closest match for a given name in the specified category."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_subgraph(
|
||||
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
||||
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
|
||||
"""Get a subgraph for the given node."""
|
||||
pass
|
||||
20
cognee/modules/ontology/models.py
Normal file
20
cognee/modules/ontology/models.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from typing import Any
|
||||
|
||||
|
||||
class AttachedOntologyNode:
|
||||
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
|
||||
|
||||
def __init__(self, uri: Any, category: str):
|
||||
self.uri = uri
|
||||
self.name = self._extract_name(uri)
|
||||
self.category = category
|
||||
|
||||
@staticmethod
|
||||
def _extract_name(uri: Any) -> str:
|
||||
uri_str = str(uri)
|
||||
if "#" in uri_str:
|
||||
return uri_str.split("#")[-1]
|
||||
return uri_str.rstrip("/").split("/")[-1]
|
||||
|
||||
def __repr__(self):
|
||||
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
||||
|
|
@ -10,30 +10,19 @@ from cognee.modules.ontology.exceptions import (
|
|||
FindClosestMatchError,
|
||||
GetSubgraphError,
|
||||
)
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
|
||||
logger = get_logger("OntologyAdapter")
|
||||
|
||||
|
||||
class AttachedOntologyNode:
|
||||
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
|
||||
|
||||
def __init__(self, uri: URIRef, category: str):
|
||||
self.uri = uri
|
||||
self.name = self._extract_name(uri)
|
||||
self.category = category
|
||||
|
||||
@staticmethod
|
||||
def _extract_name(uri: URIRef) -> str:
|
||||
uri_str = str(uri)
|
||||
if "#" in uri_str:
|
||||
return uri_str.split("#")[-1]
|
||||
return uri_str.rstrip("/").split("/")[-1]
|
||||
|
||||
def __repr__(self):
|
||||
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
||||
|
||||
|
||||
class OntologyResolver:
|
||||
class RDFLibOntologyResolver(BaseOntologyResolver):
|
||||
"""RDFLib-based ontology resolver implementation.
|
||||
|
||||
This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
|
||||
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
|
||||
"""
|
||||
|
||||
def __init__(self, ontology_file: Optional[str] = None):
|
||||
self.ontology_file = ontology_file
|
||||
try:
|
||||
|
|
@ -60,7 +49,7 @@ class OntologyResolver:
|
|||
name = uri_str.rstrip("/").split("/")[-1]
|
||||
return name.lower().replace(" ", "_").strip()
|
||||
|
||||
def build_lookup(self):
|
||||
def build_lookup(self) -> None:
|
||||
try:
|
||||
classes: Dict[str, URIRef] = {}
|
||||
individuals: Dict[str, URIRef] = {}
|
||||
|
|
@ -97,7 +86,7 @@ class OntologyResolver:
|
|||
logger.error("Failed to build lookup dictionary: %s", str(e))
|
||||
raise RuntimeError("Lookup build failed") from e
|
||||
|
||||
def refresh_lookup(self):
|
||||
def refresh_lookup(self) -> None:
|
||||
self.build_lookup()
|
||||
logger.info("Ontology lookup refreshed.")
|
||||
|
||||
|
|
@ -125,7 +114,7 @@ class OntologyResolver:
|
|||
|
||||
def get_subgraph(
|
||||
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
||||
) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
|
||||
) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]:
|
||||
nodes_set = set()
|
||||
edges: List[Tuple[str, str, str]] = []
|
||||
visited = set()
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from pydantic import BaseModel
|
|||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.tasks.storage.add_data_points import add_data_points
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||
from cognee.modules.graph.utils import (
|
||||
expand_with_nodes_and_edges,
|
||||
|
|
@ -24,7 +24,7 @@ async def integrate_chunk_graphs(
|
|||
data_chunks: list[DocumentChunk],
|
||||
chunk_graphs: list,
|
||||
graph_model: Type[BaseModel],
|
||||
ontology_adapter: OntologyResolver,
|
||||
ontology_adapter: RDFLibOntologyResolver,
|
||||
) -> List[DocumentChunk]:
|
||||
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ async def integrate_chunk_graphs(
|
|||
async def extract_graph_from_data(
|
||||
data_chunks: List[DocumentChunk],
|
||||
graph_model: Type[BaseModel],
|
||||
ontology_adapter: OntologyResolver = None,
|
||||
ontology_adapter: RDFLibOntologyResolver = None,
|
||||
custom_prompt: Optional[str] = None,
|
||||
) -> List[DocumentChunk]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from typing import List
|
|||
|
||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
|
||||
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
|
||||
extract_content_nodes_and_relationship_names,
|
||||
|
|
@ -17,7 +17,7 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
|
|||
async def extract_graph_from_data(
|
||||
data_chunks: List[DocumentChunk],
|
||||
n_rounds: int = 2,
|
||||
ontology_adapter: OntologyResolver = None,
|
||||
ontology_adapter: RDFLibOntologyResolver = None,
|
||||
) -> List[DocumentChunk]:
|
||||
"""Extract and update graph data from document chunks in multiple steps."""
|
||||
chunk_nodes = await asyncio.gather(
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
import pytest
|
||||
from rdflib import Graph, Namespace, RDF, OWL, RDFS
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
|
||||
|
||||
def test_ontology_adapter_initialization_success():
|
||||
"""Test successful initialization of OntologyAdapter."""
|
||||
|
||||
adapter = OntologyResolver()
|
||||
adapter = RDFLibOntologyResolver()
|
||||
adapter.build_lookup()
|
||||
|
||||
assert isinstance(adapter.lookup, dict)
|
||||
|
|
@ -14,7 +15,7 @@ def test_ontology_adapter_initialization_success():
|
|||
|
||||
def test_ontology_adapter_initialization_file_not_found():
|
||||
"""Test OntologyAdapter initialization with nonexistent file."""
|
||||
adapter = OntologyResolver(ontology_file="nonexistent.owl")
|
||||
adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl")
|
||||
assert adapter.graph is None
|
||||
|
||||
|
||||
|
|
@ -27,7 +28,7 @@ def test_build_lookup():
|
|||
|
||||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -50,7 +51,7 @@ def test_find_closest_match_exact():
|
|||
g.add((ns.Car, RDF.type, OWL.Class))
|
||||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -71,7 +72,7 @@ def test_find_closest_match_fuzzy():
|
|||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
g.add((ns.BMW, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -92,7 +93,7 @@ def test_find_closest_match_no_match():
|
|||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
g.add((ns.BMW, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -105,7 +106,7 @@ def test_get_subgraph_no_match_rdflib():
|
|||
"""Test get_subgraph returns empty results for a non-existent node."""
|
||||
g = Graph()
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -138,7 +139,7 @@ def test_get_subgraph_success_rdflib():
|
|||
g.add((ns.VW, owns, ns.Audi))
|
||||
g.add((ns.VW, owns, ns.Porsche))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -163,7 +164,7 @@ def test_refresh_lookup_rdflib():
|
|||
"""Test that refresh_lookup rebuilds the lookup dict into a new object."""
|
||||
g = Graph()
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue