feat: adds ontology resolver env handling

This commit is contained in:
hajdul88 2025-09-19 12:54:33 +02:00
parent 95d8992140
commit 2f225c9e03
4 changed files with 136 additions and 6 deletions

View file

@ -3,6 +3,7 @@ from pydantic import BaseModel
from typing import Union, Optional
from uuid import UUID
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.shared.logging_utils import get_logger
from cognee.shared.data_models import KnowledgeGraph
from cognee.infrastructure.llm import get_max_chunk_tokens
@ -11,7 +12,10 @@ from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.get_default_ontology_resolver import (
get_default_ontology_resolver,
get_ontology_resolver_from_env,
)
from cognee.modules.users.models import User
from cognee.tasks.documents import (
@ -188,7 +192,21 @@ async def cognify(
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
"""
if config is None:
config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
ontology_config = get_ontology_env_config()
if (
ontology_config.ontology_file_path
and ontology_config.ontology_resolver
and ontology_config.matching_strategy
):
config: Config = {
"ontology_config": {
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
}
}
else:
config: Config = {
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
}
if temporal_cognify:
tasks = await get_temporal_tasks(user, chunker, chunk_size)
@ -222,7 +240,21 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
custom_prompt: Optional[str] = None,
) -> list[Task]:
if config is None:
config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
ontology_config = get_ontology_env_config()
if (
ontology_config.ontology_file_path
and ontology_config.ontology_resolver
and ontology_config.matching_strategy
):
config: Config = {
"ontology_config": {
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
}
}
else:
config: Config = {
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
}
default_tasks = [
Task(classify_documents),

View file

@ -1,6 +1,41 @@
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
def get_default_ontology_resolver() -> RDFLibOntologyResolver:
def get_default_ontology_resolver() -> BaseOntologyResolver:
return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
def get_ontology_resolver_from_env(
ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
) -> BaseOntologyResolver:
"""
Create and return an ontology resolver instance based on environment parameters.
Currently, this function supports only the RDFLib-based ontology resolver
with a fuzzy matching strategy.
Args:
ontology_resolver (str): The ontology resolver type to use.
Supported value: "rdflib".
matching_strategy (str): The matching strategy to apply.
Supported value: "fuzzy".
ontology_file_path (str): Path to the ontology file required for the resolver.
Returns:
BaseOntologyResolver: An instance of the requested ontology resolver.
Raises:
EnvironmentError: If the provided resolver or strategy is unsupported,
or if required parameters are missing.
"""
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
return RDFLibOntologyResolver(
matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
)
else:
raise EnvironmentError(
f"Unsupported ontology resolver: {ontology_resolver}. "
f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
)

View file

@ -0,0 +1,45 @@
"""This module contains the configuration for ontology handling."""
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
class OntologyEnvConfig(BaseSettings):
"""
Represents the configuration for ontology handling, including parameters for
ontology file storage and resolution/matching strategies.
Public methods:
- to_dict
Instance variables:
- ontology_resolver
- ontology_matching
- ontology_file_path
- model_config
"""
ontology_resolver: str = "rdflib"
matching_strategy: str = "fuzzy"
ontology_file_path: str = ""
model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
def to_dict(self) -> dict:
"""
Return the configuration as a dictionary.
"""
return {
"ontology_resolver": self.ontology_resolver,
"matching_strategy": self.matching_strategy,
"ontology_file_path": self.ontology_file_path,
}
@lru_cache
def get_ontology_env_config():
"""
Retrieve the ontology configuration. This function utilizes caching to return a
singleton instance of the OntologyConfig class for efficiency.
"""
return OntologyEnvConfig()

View file

@ -3,9 +3,13 @@ from typing import Type, List, Optional
from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.tasks.storage.add_data_points import add_data_points
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.get_default_ontology_resolver import (
get_default_ontology_resolver,
get_ontology_resolver_from_env,
)
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import (
@ -124,7 +128,21 @@ async def extract_graph_from_data(
# Extract resolver from config if provided, otherwise get default
if config is None:
config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
ontology_config = get_ontology_env_config()
if (
ontology_config.ontology_file_path
and ontology_config.ontology_resolver
and ontology_config.matching_strategy
):
config: Config = {
"ontology_config": {
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
}
}
else:
config: Config = {
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
}
ontology_resolver = config["ontology_config"]["ontology_resolver"]