From 94373e5a01d948af80756987f3be990ad9652f0e Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:24:23 +0200 Subject: [PATCH] feat: adds new config structure based on requirements --- cognee/api/v1/cognify/cognify.py | 23 +++-- .../utils/expand_with_nodes_and_edges.py | 8 +- .../ontology/get_default_ontology_resolver.py | 6 ++ .../modules/ontology/get_ontology_resolver.py | 35 -------- cognee/modules/ontology/ontology_config.py | 18 ++-- .../rdf_xml/RDFLibOntologyResolver.py | 2 +- cognee/tasks/graph/extract_graph_from_data.py | 22 ++--- .../modules/ontology/test_ontology_adapter.py | 90 +++++-------------- examples/python/ontology_demo_example.py | 14 +-- examples/python/ontology_demo_example_2.py | 12 +-- 10 files changed, 85 insertions(+), 145 deletions(-) create mode 100644 cognee/modules/ontology/get_default_ontology_resolver.py delete mode 100644 cognee/modules/ontology/get_ontology_resolver.py diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 2cb844d12..f4bd5d1b4 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,8 +10,8 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.ontology_config import OntologyConfig -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.ontology_config import Config +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -40,7 +40,7 @@ async def cognify( graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_config: OntologyConfig = None, + config: Config = None, vector_db_config: dict = None, graph_db_config: dict = None, run_in_background: bool = False, @@ -101,8 +101,6 @@ async def cognify( Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2) Default limits: ~512-8192 tokens depending on models. Smaller chunks = more granular but potentially fragmented knowledge. - ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types. - Useful for specialized fields like medical or legal documents. vector_db_config: Custom vector database configuration for embeddings storage. graph_db_config: Custom graph database configuration for relationship storage. run_in_background: If True, starts processing asynchronously and returns immediately. @@ -189,14 +187,14 @@ async def cognify( - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ - if ontology_config is None: - ontology_config = get_ontology_resolver() + if config is None: + config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} if temporal_cognify: tasks = await get_temporal_tasks(user, chunker, chunk_size) else: tasks = await get_default_tasks( - user, graph_model, chunker, chunk_size, ontology_config, custom_prompt + user, graph_model, chunker, chunk_size, config, custom_prompt ) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for @@ -220,11 +218,12 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_config: OntologyConfig = None, + config: Config = None, custom_prompt: Optional[str] = None, ) -> list[Task]: - if ontology_config is None: - ontology_config = get_ontology_resolver() + if config is None: + config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} + default_tasks = [ Task(classify_documents), Task(check_permissions_on_dataset, user=user, permissions=["write"]), @@ -236,7 +235,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task( extract_graph_from_data, graph_model=graph_model, - ontology_config=ontology_config, + config=config, custom_prompt=custom_prompt, task_config={"batch_size": 10}, ), # Generate knowledge graphs from the document chunks. diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index e18860744..ef72cd0e1 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -7,9 +7,10 @@ from cognee.modules.engine.utils import ( generate_node_id, generate_node_name, ) +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def _create_node_key(node_id: str, category: str) -> str: @@ -278,7 +279,7 @@ def _process_graph_edges( def expand_with_nodes_and_edges( data_chunks: list[DocumentChunk], chunk_graphs: list[KnowledgeGraph], - ontology_resolver: RDFLibOntologyResolver = None, + ontology_resolver: BaseOntologyResolver = None, existing_edges_map: Optional[dict[str, bool]] = None, ): """ @@ -321,8 +322,7 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - config = get_ontology_resolver() - ontology_resolver = config["resolver"] + ontology_resolver = get_default_ontology_resolver() added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py new file mode 100644 index 000000000..ae10fbde5 --- /dev/null +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -0,0 +1,6 @@ +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + +def get_default_ontology_resolver() -> RDFLibOntologyResolver: + return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy()) diff --git a/cognee/modules/ontology/get_ontology_resolver.py b/cognee/modules/ontology/get_ontology_resolver.py deleted file mode 100644 index d75928af9..000000000 --- a/cognee/modules/ontology/get_ontology_resolver.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy -from cognee.modules.ontology.ontology_config import OntologyConfig - - -def get_ontology_resolver( - resolver: Optional[BaseOntologyResolver] = None, - matching_strategy: Optional[MatchingStrategy] = None, -) -> OntologyConfig: - """Get ontology resolver configuration with default or custom objects. - - Args: - resolver: Optional pre-configured ontology resolver instance - matching_strategy: Optional matching strategy instance - - Returns: - Ontology configuration with default RDFLib resolver and fuzzy matching strategy, - or custom objects if provided - """ - config: OntologyConfig = {} - - if resolver is not None: - config["resolver"] = resolver - config["matching_strategy"] = matching_strategy or resolver.matching_strategy - else: - default_strategy = matching_strategy or FuzzyMatchingStrategy() - config["resolver"] = RDFLibOntologyResolver( - ontology_file=None, matching_strategy=default_strategy - ) - config["matching_strategy"] = default_strategy - - return config diff --git a/cognee/modules/ontology/ontology_config.py b/cognee/modules/ontology/ontology_config.py index e28da9f92..397411edc 100644 --- a/cognee/modules/ontology/ontology_config.py +++ b/cognee/modules/ontology/ontology_config.py @@ -5,12 +5,20 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy class OntologyConfig(TypedDict, total=False): - """Configuration for ontology resolver. + """Configuration containing ontology resolver. Attributes: - resolver: The ontology resolver instance to use - matching_strategy: The matching strategy to use + ontology_resolver: The ontology resolver instance to use """ - resolver: Optional[BaseOntologyResolver] - matching_strategy: Optional[MatchingStrategy] + ontology_resolver: Optional[BaseOntologyResolver] + + +class Config(TypedDict, total=False): + """Top-level configuration dictionary. + + Attributes: + ontology_config: Configuration containing ontology resolver + """ + + ontology_config: Optional[OntologyConfig] diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index c6b3e22be..2a7a03751 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -28,7 +28,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver): self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None, - ): + ) -> None: super().__init__(matching_strategy) self.ontology_file = ontology_file try: diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index f0ef9c7f9..7c049546c 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,8 +4,8 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points -from cognee.modules.ontology.ontology_config import OntologyConfig -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.ontology_config import Config +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( @@ -26,7 +26,7 @@ async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, graph_model: Type[BaseModel], - ontology_adapter: RDFLibOntologyResolver, + ontology_resolver: RDFLibOntologyResolver, ) -> List[DocumentChunk]: """Updates DocumentChunk objects, integrates data points and edges into databases.""" @@ -38,9 +38,9 @@ async def integrate_chunk_graphs( ) if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel): raise InvalidGraphModelError(graph_model) - if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"): + if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"): raise InvalidOntologyAdapterError( - type(ontology_adapter).__name__ if ontology_adapter else "None" + type(ontology_resolver).__name__ if ontology_resolver else "None" ) graph_engine = await get_graph_engine() @@ -57,7 +57,7 @@ async def integrate_chunk_graphs( ) graph_nodes, graph_edges = expand_with_nodes_and_edges( - data_chunks, chunk_graphs, ontology_adapter, existing_edges_map + data_chunks, chunk_graphs, ontology_resolver, existing_edges_map ) if len(graph_nodes) > 0: @@ -72,7 +72,7 @@ async def integrate_chunk_graphs( async def extract_graph_from_data( data_chunks: List[DocumentChunk], graph_model: Type[BaseModel], - ontology_config: OntologyConfig = None, + config: Config = None, custom_prompt: Optional[str] = None, ) -> List[DocumentChunk]: """ @@ -104,9 +104,9 @@ async def extract_graph_from_data( ] # Extract resolver from config if provided, otherwise get default - if ontology_config is None: - ontology_config = get_ontology_resolver() + if config is None: + config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}} - ontology_adapter = ontology_config["resolver"] + ontology_resolver = config["ontology_config"]["ontology_resolver"] - return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter) + return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 9b7eeeae0..88e9b314d 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -2,13 +2,13 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - config = get_ontology_resolver() + config = get_default_ontology_resolver() adapter = config["resolver"] adapter.build_lookup() @@ -108,7 +108,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - config = get_ontology_resolver() + config = get_default_ontology_resolver() resolver = config["resolver"] resolver.graph = g resolver.build_lookup() @@ -167,7 +167,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - config = get_ontology_resolver() + config = get_default_ontology_resolver() resolver = config["resolver"] resolver.graph = g resolver.build_lookup() @@ -272,89 +272,47 @@ def test_base_ontology_resolver_custom_matching_strategy(): def test_ontology_config_structure(): - """Test OntologyConfig TypedDict structure.""" - from cognee.modules.ontology.ontology_config import OntologyConfig + """Test TypedDict structure for ontology configuration.""" + from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy resolver = RDFLibOntologyResolver() matching_strategy = FuzzyMatchingStrategy() - config: OntologyConfig = {"resolver": resolver, "matching_strategy": matching_strategy} + config: Config = {"ontology_config": {"ontology_resolver": resolver}} - assert config["resolver"] == resolver - assert config["matching_strategy"] == matching_strategy + assert config["ontology_config"]["ontology_resolver"] == resolver def test_get_ontology_resolver_default(): """Test get_ontology_resolver returns default configuration.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver + from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - config = get_ontology_resolver() + config: Config = get_default_ontology_resolver() - assert isinstance(config["resolver"], RDFLibOntologyResolver) - assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) - assert config["resolver"].matching_strategy == config["matching_strategy"] + assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver) + assert isinstance( + config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy + ) -def test_get_ontology_resolver_custom_resolver(): - """Test get_ontology_resolver with custom resolver.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +def test_get_default_ontology_resolver(): + """Test get_default_ontology_resolver returns default configuration.""" + from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver + from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") - config = get_ontology_resolver(resolver=custom_resolver) + config: Config = get_default_ontology_resolver() - assert config["resolver"] == custom_resolver - assert config["matching_strategy"] == custom_resolver.matching_strategy - assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) - - -def test_get_ontology_resolver_custom_matching_strategy(): - """Test get_ontology_resolver with custom matching strategy.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver - from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) - config = get_ontology_resolver(matching_strategy=custom_strategy) - - assert isinstance(config["resolver"], RDFLibOntologyResolver) - assert config["matching_strategy"] == custom_strategy - assert config["resolver"].matching_strategy == custom_strategy - - -def test_get_ontology_resolver_both_custom(): - """Test get_ontology_resolver with both custom resolver and matching strategy.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver - from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") - custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) - config = get_ontology_resolver(resolver=custom_resolver, matching_strategy=custom_strategy) - - assert config["resolver"] == custom_resolver - assert config["matching_strategy"] == custom_strategy - - -def test_get_ontology_resolver_only_resolver_uses_resolver_strategy(): - """Test that when only resolver is passed, it uses the resolver's matching strategy.""" - from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver - from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - custom_strategy = FuzzyMatchingStrategy(cutoff=0.8) - custom_resolver = RDFLibOntologyResolver(matching_strategy=custom_strategy) - - config = get_ontology_resolver(resolver=custom_resolver) - - assert config["resolver"] == custom_resolver - assert config["matching_strategy"] == custom_strategy - assert config["matching_strategy"] == custom_resolver.matching_strategy + assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver) + assert isinstance( + config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy + ) def test_rdflib_ontology_resolver_uses_matching_strategy(): diff --git a/examples/python/ontology_demo_example.py b/examples/python/ontology_demo_example.py index ea1ab8b72..5b18e6ed4 100644 --- a/examples/python/ontology_demo_example.py +++ b/examples/python/ontology_demo_example.py @@ -5,8 +5,8 @@ import cognee from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph from cognee.shared.logging_utils import setup_logging -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.ontology_config import Config text_1 = """ 1. Audi @@ -62,12 +62,14 @@ async def main(): os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl" ) - # Create ontology config with custom ontology file - ontology_config = get_ontology_resolver( - resolver=RDFLibOntologyResolver(ontology_file=ontology_path) - ) + # Create full config structure manually + config: Config = { + "ontology_config": { + "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path) + } + } - await cognee.cognify(ontology_config=ontology_config) + await cognee.cognify(config=config) print("Knowledge with ontology created.") # Step 4: Query insights diff --git a/examples/python/ontology_demo_example_2.py b/examples/python/ontology_demo_example_2.py index e897da2e5..01bcd9ae4 100644 --- a/examples/python/ontology_demo_example_2.py +++ b/examples/python/ontology_demo_example_2.py @@ -5,8 +5,8 @@ import os import textwrap from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph -from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.ontology_config import Config async def run_pipeline(ontology_path=None): @@ -19,11 +19,13 @@ async def run_pipeline(ontology_path=None): await cognee.add(scientific_papers_dir) - ontology_config = get_ontology_resolver( - resolver=RDFLibOntologyResolver(ontology_file=ontology_path) - ) + config: Config = { + "ontology_config": { + "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path) + } + } - pipeline_run = await cognee.cognify(ontology_config=ontology_config) + pipeline_run = await cognee.cognify(config=config) return pipeline_run