From 2f225c9e036c6444da73d641f89fe63e96c5d438 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:54:33 +0200 Subject: [PATCH] feat: adds ontology resolver env handling --- cognee/api/v1/cognify/cognify.py | 38 ++++++++++++++-- .../ontology/get_default_ontology_resolver.py | 37 ++++++++++++++- .../modules/ontology/ontology_env_config.py | 45 +++++++++++++++++++ cognee/tasks/graph/extract_graph_from_data.py | 22 ++++++++- 4 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 cognee/modules/ontology/ontology_env_config.py diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index f4bd5d1b4..1292d243a 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -3,6 +3,7 @@ from pydantic import BaseModel from typing import Union, Optional from uuid import UUID +from cognee.modules.ontology.ontology_env_config import get_ontology_env_config from cognee.shared.logging_utils import get_logger from cognee.shared.data_models import KnowledgeGraph from cognee.infrastructure.llm import get_max_chunk_tokens @@ -11,7 +12,10 @@ from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker from cognee.modules.ontology.ontology_config import Config -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import ( + get_default_ontology_resolver, + get_ontology_resolver_from_env, +) from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -188,7 +192,21 @@ async def cognify( - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ if config is None: - config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + ontology_config = get_ontology_env_config() + if ( + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy + ): + config: Config = { + "ontology_config": { + "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict()) + } + } + else: + config: Config = { + "ontology_config": {"ontology_resolver": get_default_ontology_resolver()} + } if temporal_cognify: tasks = await get_temporal_tasks(user, chunker, chunk_size) @@ -222,7 +240,21 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's custom_prompt: Optional[str] = None, ) -> list[Task]: if config is None: - config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + ontology_config = get_ontology_env_config() + if ( + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy + ): + config: Config = { + "ontology_config": { + "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict()) + } + } + else: + config: Config = { + "ontology_config": {"ontology_resolver": get_default_ontology_resolver()} + } default_tasks = [ Task(classify_documents), diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py index ae10fbde5..f9aebe59a 100644 --- a/cognee/modules/ontology/get_default_ontology_resolver.py +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -1,6 +1,41 @@ +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy -def get_default_ontology_resolver() -> RDFLibOntologyResolver: +def get_default_ontology_resolver() -> BaseOntologyResolver: return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy()) + + +def get_ontology_resolver_from_env( + ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = "" +) -> BaseOntologyResolver: + """ + Create and return an ontology resolver instance based on environment parameters. + + Currently, this function supports only the RDFLib-based ontology resolver + with a fuzzy matching strategy. + + Args: + ontology_resolver (str): The ontology resolver type to use. + Supported value: "rdflib". + matching_strategy (str): The matching strategy to apply. + Supported value: "fuzzy". + ontology_file_path (str): Path to the ontology file required for the resolver. + + Returns: + BaseOntologyResolver: An instance of the requested ontology resolver. + + Raises: + EnvironmentError: If the provided resolver or strategy is unsupported, + or if required parameters are missing. + """ + if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path: + return RDFLibOntologyResolver( + matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path + ) + else: + raise EnvironmentError( + f"Unsupported ontology resolver: {ontology_resolver}. " + f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy." + ) diff --git a/cognee/modules/ontology/ontology_env_config.py b/cognee/modules/ontology/ontology_env_config.py new file mode 100644 index 000000000..a351b35e7 --- /dev/null +++ b/cognee/modules/ontology/ontology_env_config.py @@ -0,0 +1,45 @@ +"""This module contains the configuration for ontology handling.""" + +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class OntologyEnvConfig(BaseSettings): + """ + Represents the configuration for ontology handling, including parameters for + ontology file storage and resolution/matching strategies. + + Public methods: + - to_dict + + Instance variables: + - ontology_resolver + - ontology_matching + - ontology_file_path + - model_config + """ + + ontology_resolver: str = "rdflib" + matching_strategy: str = "fuzzy" + ontology_file_path: str = "" + + model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True) + + def to_dict(self) -> dict: + """ + Return the configuration as a dictionary. + """ + return { + "ontology_resolver": self.ontology_resolver, + "matching_strategy": self.matching_strategy, + "ontology_file_path": self.ontology_file_path, + } + + +@lru_cache +def get_ontology_env_config(): + """ + Retrieve the ontology configuration. This function utilizes caching to return a + singleton instance of the OntologyConfig class for efficiency. + """ + return OntologyEnvConfig() diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 391c6fabe..e4dafe4e7 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -3,9 +3,13 @@ from typing import Type, List, Optional from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.modules.ontology.ontology_env_config import get_ontology_env_config from cognee.tasks.storage.add_data_points import add_data_points from cognee.modules.ontology.ontology_config import Config -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import ( + get_default_ontology_resolver, + get_ontology_resolver_from_env, +) from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( @@ -124,7 +128,21 @@ async def extract_graph_from_data( # Extract resolver from config if provided, otherwise get default if config is None: - config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + ontology_config = get_ontology_env_config() + if ( + ontology_config.ontology_file_path + and ontology_config.ontology_resolver + and ontology_config.matching_strategy + ): + config: Config = { + "ontology_config": { + "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict()) + } + } + else: + config: Config = { + "ontology_config": {"ontology_resolver": get_default_ontology_resolver()} + } ontology_resolver = config["ontology_config"]["ontology_resolver"]