feat: adds new config structure based on requirements

This commit is contained in:
hajdul88 2025-09-18 17:24:23 +02:00
parent c42755b92e
commit 94373e5a01
10 changed files with 85 additions and 145 deletions

View file

@ -10,8 +10,8 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.ontology.ontology_config import OntologyConfig from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.tasks.documents import ( from cognee.tasks.documents import (
@ -40,7 +40,7 @@ async def cognify(
graph_model: BaseModel = KnowledgeGraph, graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker, chunker=TextChunker,
chunk_size: int = None, chunk_size: int = None,
ontology_config: OntologyConfig = None, config: Config = None,
vector_db_config: dict = None, vector_db_config: dict = None,
graph_db_config: dict = None, graph_db_config: dict = None,
run_in_background: bool = False, run_in_background: bool = False,
@ -101,8 +101,6 @@ async def cognify(
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2) Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
Default limits: ~512-8192 tokens depending on models. Default limits: ~512-8192 tokens depending on models.
Smaller chunks = more granular but potentially fragmented knowledge. Smaller chunks = more granular but potentially fragmented knowledge.
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
Useful for specialized fields like medical or legal documents.
vector_db_config: Custom vector database configuration for embeddings storage. vector_db_config: Custom vector database configuration for embeddings storage.
graph_db_config: Custom graph database configuration for relationship storage. graph_db_config: Custom graph database configuration for relationship storage.
run_in_background: If True, starts processing asynchronously and returns immediately. run_in_background: If True, starts processing asynchronously and returns immediately.
@ -189,14 +187,14 @@ async def cognify(
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
""" """
if ontology_config is None: if config is None:
ontology_config = get_ontology_resolver() config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
if temporal_cognify: if temporal_cognify:
tasks = await get_temporal_tasks(user, chunker, chunk_size) tasks = await get_temporal_tasks(user, chunker, chunk_size)
else: else:
tasks = await get_default_tasks( tasks = await get_default_tasks(
user, graph_model, chunker, chunk_size, ontology_config, custom_prompt user, graph_model, chunker, chunk_size, config, custom_prompt
) )
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@ -220,11 +218,12 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
graph_model: BaseModel = KnowledgeGraph, graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker, chunker=TextChunker,
chunk_size: int = None, chunk_size: int = None,
ontology_config: OntologyConfig = None, config: Config = None,
custom_prompt: Optional[str] = None, custom_prompt: Optional[str] = None,
) -> list[Task]: ) -> list[Task]:
if ontology_config is None: if config is None:
ontology_config = get_ontology_resolver() config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
default_tasks = [ default_tasks = [
Task(classify_documents), Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]), Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@ -236,7 +235,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
Task( Task(
extract_graph_from_data, extract_graph_from_data,
graph_model=graph_model, graph_model=graph_model,
ontology_config=ontology_config, config=config,
custom_prompt=custom_prompt, custom_prompt=custom_prompt,
task_config={"batch_size": 10}, task_config={"batch_size": 10},
), # Generate knowledge graphs from the document chunks. ), # Generate knowledge graphs from the document chunks.

View file

@ -7,9 +7,10 @@ from cognee.modules.engine.utils import (
generate_node_id, generate_node_id,
generate_node_name, generate_node_name,
) )
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
def _create_node_key(node_id: str, category: str) -> str: def _create_node_key(node_id: str, category: str) -> str:
@ -278,7 +279,7 @@ def _process_graph_edges(
def expand_with_nodes_and_edges( def expand_with_nodes_and_edges(
data_chunks: list[DocumentChunk], data_chunks: list[DocumentChunk],
chunk_graphs: list[KnowledgeGraph], chunk_graphs: list[KnowledgeGraph],
ontology_resolver: RDFLibOntologyResolver = None, ontology_resolver: BaseOntologyResolver = None,
existing_edges_map: Optional[dict[str, bool]] = None, existing_edges_map: Optional[dict[str, bool]] = None,
): ):
""" """
@ -321,8 +322,7 @@ def expand_with_nodes_and_edges(
existing_edges_map = {} existing_edges_map = {}
if ontology_resolver is None: if ontology_resolver is None:
config = get_ontology_resolver() ontology_resolver = get_default_ontology_resolver()
ontology_resolver = config["resolver"]
added_nodes_map = {} added_nodes_map = {}
added_ontology_nodes_map = {} added_ontology_nodes_map = {}

View file

@ -0,0 +1,6 @@
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
def get_default_ontology_resolver() -> RDFLibOntologyResolver:
return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())

View file

@ -1,35 +0,0 @@
from typing import Optional
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
from cognee.modules.ontology.ontology_config import OntologyConfig
def get_ontology_resolver(
resolver: Optional[BaseOntologyResolver] = None,
matching_strategy: Optional[MatchingStrategy] = None,
) -> OntologyConfig:
"""Get ontology resolver configuration with default or custom objects.
Args:
resolver: Optional pre-configured ontology resolver instance
matching_strategy: Optional matching strategy instance
Returns:
Ontology configuration with default RDFLib resolver and fuzzy matching strategy,
or custom objects if provided
"""
config: OntologyConfig = {}
if resolver is not None:
config["resolver"] = resolver
config["matching_strategy"] = matching_strategy or resolver.matching_strategy
else:
default_strategy = matching_strategy or FuzzyMatchingStrategy()
config["resolver"] = RDFLibOntologyResolver(
ontology_file=None, matching_strategy=default_strategy
)
config["matching_strategy"] = default_strategy
return config

View file

@ -5,12 +5,20 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy
class OntologyConfig(TypedDict, total=False): class OntologyConfig(TypedDict, total=False):
"""Configuration for ontology resolver. """Configuration containing ontology resolver.
Attributes: Attributes:
resolver: The ontology resolver instance to use ontology_resolver: The ontology resolver instance to use
matching_strategy: The matching strategy to use
""" """
resolver: Optional[BaseOntologyResolver] ontology_resolver: Optional[BaseOntologyResolver]
matching_strategy: Optional[MatchingStrategy]
class Config(TypedDict, total=False):
"""Top-level configuration dictionary.
Attributes:
ontology_config: Configuration containing ontology resolver
"""
ontology_config: Optional[OntologyConfig]

View file

@ -28,7 +28,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
self, self,
ontology_file: Optional[str] = None, ontology_file: Optional[str] = None,
matching_strategy: Optional[MatchingStrategy] = None, matching_strategy: Optional[MatchingStrategy] = None,
): ) -> None:
super().__init__(matching_strategy) super().__init__(matching_strategy)
self.ontology_file = ontology_file self.ontology_file = ontology_file
try: try:

View file

@ -4,8 +4,8 @@ from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.tasks.storage.add_data_points import add_data_points from cognee.tasks.storage.add_data_points import add_data_points
from cognee.modules.ontology.ontology_config import OntologyConfig from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import ( from cognee.modules.graph.utils import (
@ -26,7 +26,7 @@ async def integrate_chunk_graphs(
data_chunks: list[DocumentChunk], data_chunks: list[DocumentChunk],
chunk_graphs: list, chunk_graphs: list,
graph_model: Type[BaseModel], graph_model: Type[BaseModel],
ontology_adapter: RDFLibOntologyResolver, ontology_resolver: RDFLibOntologyResolver,
) -> List[DocumentChunk]: ) -> List[DocumentChunk]:
"""Updates DocumentChunk objects, integrates data points and edges into databases.""" """Updates DocumentChunk objects, integrates data points and edges into databases."""
@ -38,9 +38,9 @@ async def integrate_chunk_graphs(
) )
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel): if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
raise InvalidGraphModelError(graph_model) raise InvalidGraphModelError(graph_model)
if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"): if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
raise InvalidOntologyAdapterError( raise InvalidOntologyAdapterError(
type(ontology_adapter).__name__ if ontology_adapter else "None" type(ontology_resolver).__name__ if ontology_resolver else "None"
) )
graph_engine = await get_graph_engine() graph_engine = await get_graph_engine()
@ -57,7 +57,7 @@ async def integrate_chunk_graphs(
) )
graph_nodes, graph_edges = expand_with_nodes_and_edges( graph_nodes, graph_edges = expand_with_nodes_and_edges(
data_chunks, chunk_graphs, ontology_adapter, existing_edges_map data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
) )
if len(graph_nodes) > 0: if len(graph_nodes) > 0:
@ -72,7 +72,7 @@ async def integrate_chunk_graphs(
async def extract_graph_from_data( async def extract_graph_from_data(
data_chunks: List[DocumentChunk], data_chunks: List[DocumentChunk],
graph_model: Type[BaseModel], graph_model: Type[BaseModel],
ontology_config: OntologyConfig = None, config: Config = None,
custom_prompt: Optional[str] = None, custom_prompt: Optional[str] = None,
) -> List[DocumentChunk]: ) -> List[DocumentChunk]:
""" """
@ -104,9 +104,9 @@ async def extract_graph_from_data(
] ]
# Extract resolver from config if provided, otherwise get default # Extract resolver from config if provided, otherwise get default
if ontology_config is None: if config is None:
ontology_config = get_ontology_resolver() config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
ontology_adapter = ontology_config["resolver"] ontology_resolver = config["ontology_config"]["ontology_resolver"]
return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter) return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)

View file

@ -2,13 +2,13 @@ import pytest
from rdflib import Graph, Namespace, RDF, OWL, RDFS from rdflib import Graph, Namespace, RDF, OWL, RDFS
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode from cognee.modules.ontology.models import AttachedOntologyNode
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
def test_ontology_adapter_initialization_success(): def test_ontology_adapter_initialization_success():
"""Test successful initialization of OntologyAdapter.""" """Test successful initialization of OntologyAdapter."""
config = get_ontology_resolver() config = get_default_ontology_resolver()
adapter = config["resolver"] adapter = config["resolver"]
adapter.build_lookup() adapter.build_lookup()
@ -108,7 +108,7 @@ def test_get_subgraph_no_match_rdflib():
"""Test get_subgraph returns empty results for a non-existent node.""" """Test get_subgraph returns empty results for a non-existent node."""
g = Graph() g = Graph()
config = get_ontology_resolver() config = get_default_ontology_resolver()
resolver = config["resolver"] resolver = config["resolver"]
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -167,7 +167,7 @@ def test_refresh_lookup_rdflib():
"""Test that refresh_lookup rebuilds the lookup dict into a new object.""" """Test that refresh_lookup rebuilds the lookup dict into a new object."""
g = Graph() g = Graph()
config = get_ontology_resolver() config = get_default_ontology_resolver()
resolver = config["resolver"] resolver = config["resolver"]
resolver.graph = g resolver.graph = g
resolver.build_lookup() resolver.build_lookup()
@ -272,89 +272,47 @@ def test_base_ontology_resolver_custom_matching_strategy():
def test_ontology_config_structure(): def test_ontology_config_structure():
"""Test OntologyConfig TypedDict structure.""" """Test TypedDict structure for ontology configuration."""
from cognee.modules.ontology.ontology_config import OntologyConfig from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
resolver = RDFLibOntologyResolver() resolver = RDFLibOntologyResolver()
matching_strategy = FuzzyMatchingStrategy() matching_strategy = FuzzyMatchingStrategy()
config: OntologyConfig = {"resolver": resolver, "matching_strategy": matching_strategy} config: Config = {"ontology_config": {"ontology_resolver": resolver}}
assert config["resolver"] == resolver assert config["ontology_config"]["ontology_resolver"] == resolver
assert config["matching_strategy"] == matching_strategy
def test_get_ontology_resolver_default(): def test_get_ontology_resolver_default():
"""Test get_ontology_resolver returns default configuration.""" """Test get_ontology_resolver returns default configuration."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
config = get_ontology_resolver() config: Config = get_default_ontology_resolver()
assert isinstance(config["resolver"], RDFLibOntologyResolver) assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver)
assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) assert isinstance(
assert config["resolver"].matching_strategy == config["matching_strategy"] config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy
)
def test_get_ontology_resolver_custom_resolver(): def test_get_default_ontology_resolver():
"""Test get_ontology_resolver with custom resolver.""" """Test get_default_ontology_resolver returns default configuration."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") config: Config = get_default_ontology_resolver()
config = get_ontology_resolver(resolver=custom_resolver)
assert config["resolver"] == custom_resolver assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver)
assert config["matching_strategy"] == custom_resolver.matching_strategy assert isinstance(
assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy
)
def test_get_ontology_resolver_custom_matching_strategy():
"""Test get_ontology_resolver with custom matching strategy."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_strategy = FuzzyMatchingStrategy(cutoff=0.9)
config = get_ontology_resolver(matching_strategy=custom_strategy)
assert isinstance(config["resolver"], RDFLibOntologyResolver)
assert config["matching_strategy"] == custom_strategy
assert config["resolver"].matching_strategy == custom_strategy
def test_get_ontology_resolver_both_custom():
"""Test get_ontology_resolver with both custom resolver and matching strategy."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl")
custom_strategy = FuzzyMatchingStrategy(cutoff=0.9)
config = get_ontology_resolver(resolver=custom_resolver, matching_strategy=custom_strategy)
assert config["resolver"] == custom_resolver
assert config["matching_strategy"] == custom_strategy
def test_get_ontology_resolver_only_resolver_uses_resolver_strategy():
"""Test that when only resolver is passed, it uses the resolver's matching strategy."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_strategy = FuzzyMatchingStrategy(cutoff=0.8)
custom_resolver = RDFLibOntologyResolver(matching_strategy=custom_strategy)
config = get_ontology_resolver(resolver=custom_resolver)
assert config["resolver"] == custom_resolver
assert config["matching_strategy"] == custom_strategy
assert config["matching_strategy"] == custom_resolver.matching_strategy
def test_rdflib_ontology_resolver_uses_matching_strategy(): def test_rdflib_ontology_resolver_uses_matching_strategy():

View file

@ -5,8 +5,8 @@ import cognee
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.shared.logging_utils import setup_logging from cognee.shared.logging_utils import setup_logging
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.ontology_config import Config
text_1 = """ text_1 = """
1. Audi 1. Audi
@ -62,12 +62,14 @@ async def main():
os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl" os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl"
) )
# Create ontology config with custom ontology file # Create full config structure manually
ontology_config = get_ontology_resolver( config: Config = {
resolver=RDFLibOntologyResolver(ontology_file=ontology_path) "ontology_config": {
) "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
}
}
await cognee.cognify(ontology_config=ontology_config) await cognee.cognify(config=config)
print("Knowledge with ontology created.") print("Knowledge with ontology created.")
# Step 4: Query insights # Step 4: Query insights

View file

@ -5,8 +5,8 @@ import os
import textwrap import textwrap
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.ontology_config import Config
async def run_pipeline(ontology_path=None): async def run_pipeline(ontology_path=None):
@ -19,11 +19,13 @@ async def run_pipeline(ontology_path=None):
await cognee.add(scientific_papers_dir) await cognee.add(scientific_papers_dir)
ontology_config = get_ontology_resolver( config: Config = {
resolver=RDFLibOntologyResolver(ontology_file=ontology_path) "ontology_config": {
) "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
}
}
pipeline_run = await cognee.cognify(ontology_config=ontology_config) pipeline_run = await cognee.cognify(config=config)
return pipeline_run return pipeline_run