cognee/cognee/modules/ontology/rdf_xml/OntologyResolver.py
hajdul88 9d5835042a
Chore: Changes empty ontology logging from warning to info (#964)
<!-- .github/pull_request_template.md -->

## Description
Changes empty ontology logging from warning to info

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
2025-06-11 09:52:51 +02:00

140 lines
5.5 KiB
Python

import os
import difflib
from cognee.shared.logging_utils import get_logger
from collections import deque
from typing import List, Tuple, Dict, Optional, Any
from owlready2 import get_ontology, ClassConstruct, Ontology, Thing
from cognee.modules.ontology.exceptions import (
OntologyInitializationError,
FindClosestMatchError,
GetSubgraphError,
)
logger = get_logger("OntologyAdapter")
class OntologyResolver:
def __init__(
self,
ontology_file: Optional[str] = None,
fallback_url: str = "http://example.org/empty_ontology",
):
self.ontology_file = ontology_file
try:
if ontology_file and os.path.exists(ontology_file):
self.ontology: Ontology = get_ontology(ontology_file).load()
logger.info("Ontology loaded successfully from file: %s", ontology_file)
else:
logger.info(
"Ontology file '%s' not found. Using fallback ontology at %s",
ontology_file,
fallback_url,
)
self.ontology = get_ontology(fallback_url)
self.build_lookup()
except Exception as e:
logger.error("Failed to load ontology", exc_info=e)
raise OntologyInitializationError() from e
def build_lookup(self):
try:
self.lookup: Dict[str, Dict[str, Thing]] = {
"classes": {
cls.name.lower().replace(" ", "_").strip(): cls
for cls in self.ontology.classes()
},
"individuals": {
ind.name.lower().replace(" ", "_").strip(): ind
for ind in self.ontology.individuals()
},
}
logger.info(
"Lookup built: %d classes, %d individuals",
len(self.lookup["classes"]),
len(self.lookup["individuals"]),
)
except Exception as e:
logger.error("Failed to build lookup dictionary: %s", str(e))
raise RuntimeError("Lookup build failed") from e
def refresh_lookup(self):
self.build_lookup()
logger.info("Ontology lookup refreshed.")
def find_closest_match(self, name: str, category: str) -> Optional[str]:
try:
normalized_name = name.lower().replace(" ", "_").strip()
possible_matches = list(self.lookup.get(category, {}).keys())
if normalized_name in possible_matches:
return normalized_name
best_match = difflib.get_close_matches(
normalized_name, possible_matches, n=1, cutoff=0.8
)
return best_match[0] if best_match else None
except Exception as e:
logger.error("Error in find_closest_match: %s", str(e))
raise FindClosestMatchError() from e
def get_subgraph(
self, node_name: str, node_type: str = "individuals"
) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
nodes_set = set()
edges: List[Tuple[str, str, str]] = []
visited_nodes = set()
queue = deque()
try:
closest_match = self.find_closest_match(name=node_name, category=node_type)
if not closest_match:
logger.info("No close match found for '%s' in category '%s'", node_name, node_type)
return list(nodes_set), edges, None
node = self.lookup[node_type].get(closest_match)
if node is None:
logger.info("Node '%s' not found in lookup.", closest_match)
return list(nodes_set), edges, None
logger.info("%s match was found for found for '%s' node", node.name, node_name)
queue.append(node)
visited_nodes.add(node)
nodes_set.add(node)
while queue:
current_node = queue.popleft()
if hasattr(current_node, "is_a"):
for parent in current_node.is_a:
if isinstance(parent, ClassConstruct):
if hasattr(parent, "value") and hasattr(parent.value, "name"):
parent = parent.value
else:
continue
edges.append((current_node.name, "is_a", parent.name))
nodes_set.add(parent)
if parent not in visited_nodes:
visited_nodes.add(parent)
queue.append(parent)
for prop in self.ontology.object_properties():
for target in prop[current_node]:
edges.append((current_node.name, prop.name, target.name))
nodes_set.add(target)
if target not in visited_nodes:
visited_nodes.add(target)
queue.append(target)
for source in prop.range:
if current_node in prop[source]:
edges.append((source.name, prop.name, current_node.name))
nodes_set.add(source)
if source not in visited_nodes:
visited_nodes.add(source)
queue.append(source)
return list(nodes_set), edges, node
except Exception as e:
logger.error("Error in get_subgraph: %s", str(e))
raise GetSubgraphError() from e