feat: adds new config structure based on requirements

This commit is contained in:
hajdul88 2025-09-18 17:24:23 +02:00
parent c42755b92e
commit 94373e5a01
10 changed files with 85 additions and 145 deletions

View file

@ -10,8 +10,8 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.ontology.ontology_config import OntologyConfig
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
from cognee.modules.users.models import User
from cognee.tasks.documents import (
@ -40,7 +40,7 @@ async def cognify(
graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker,
chunk_size: int = None,
ontology_config: OntologyConfig = None,
config: Config = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
run_in_background: bool = False,
@ -101,8 +101,6 @@ async def cognify(
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
Default limits: ~512-8192 tokens depending on models.
Smaller chunks = more granular but potentially fragmented knowledge.
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
Useful for specialized fields like medical or legal documents.
vector_db_config: Custom vector database configuration for embeddings storage.
graph_db_config: Custom graph database configuration for relationship storage.
run_in_background: If True, starts processing asynchronously and returns immediately.
@ -189,14 +187,14 @@ async def cognify(
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
"""
if ontology_config is None:
ontology_config = get_ontology_resolver()
if config is None:
config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
if temporal_cognify:
tasks = await get_temporal_tasks(user, chunker, chunk_size)
else:
tasks = await get_default_tasks(
user, graph_model, chunker, chunk_size, ontology_config, custom_prompt
user, graph_model, chunker, chunk_size, config, custom_prompt
)
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@ -220,11 +218,12 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker,
chunk_size: int = None,
ontology_config: OntologyConfig = None,
config: Config = None,
custom_prompt: Optional[str] = None,
) -> list[Task]:
if ontology_config is None:
ontology_config = get_ontology_resolver()
if config is None:
config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@ -236,7 +235,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
Task(
extract_graph_from_data,
graph_model=graph_model,
ontology_config=ontology_config,
config=config,
custom_prompt=custom_prompt,
task_config={"batch_size": 10},
), # Generate knowledge graphs from the document chunks.

View file

@ -7,9 +7,10 @@ from cognee.modules.engine.utils import (
generate_node_id,
generate_node_name,
)
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
def _create_node_key(node_id: str, category: str) -> str:
@ -278,7 +279,7 @@ def _process_graph_edges(
def expand_with_nodes_and_edges(
data_chunks: list[DocumentChunk],
chunk_graphs: list[KnowledgeGraph],
ontology_resolver: RDFLibOntologyResolver = None,
ontology_resolver: BaseOntologyResolver = None,
existing_edges_map: Optional[dict[str, bool]] = None,
):
"""
@ -321,8 +322,7 @@ def expand_with_nodes_and_edges(
existing_edges_map = {}
if ontology_resolver is None:
config = get_ontology_resolver()
ontology_resolver = config["resolver"]
ontology_resolver = get_default_ontology_resolver()
added_nodes_map = {}
added_ontology_nodes_map = {}

View file

@ -0,0 +1,6 @@
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
def get_default_ontology_resolver() -> RDFLibOntologyResolver:
return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())

View file

@ -1,35 +0,0 @@
from typing import Optional
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
from cognee.modules.ontology.ontology_config import OntologyConfig
def get_ontology_resolver(
resolver: Optional[BaseOntologyResolver] = None,
matching_strategy: Optional[MatchingStrategy] = None,
) -> OntologyConfig:
"""Get ontology resolver configuration with default or custom objects.
Args:
resolver: Optional pre-configured ontology resolver instance
matching_strategy: Optional matching strategy instance
Returns:
Ontology configuration with default RDFLib resolver and fuzzy matching strategy,
or custom objects if provided
"""
config: OntologyConfig = {}
if resolver is not None:
config["resolver"] = resolver
config["matching_strategy"] = matching_strategy or resolver.matching_strategy
else:
default_strategy = matching_strategy or FuzzyMatchingStrategy()
config["resolver"] = RDFLibOntologyResolver(
ontology_file=None, matching_strategy=default_strategy
)
config["matching_strategy"] = default_strategy
return config

View file

@ -5,12 +5,20 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy
class OntologyConfig(TypedDict, total=False):
"""Configuration for ontology resolver.
"""Configuration containing ontology resolver.
Attributes:
resolver: The ontology resolver instance to use
matching_strategy: The matching strategy to use
ontology_resolver: The ontology resolver instance to use
"""
resolver: Optional[BaseOntologyResolver]
matching_strategy: Optional[MatchingStrategy]
ontology_resolver: Optional[BaseOntologyResolver]
class Config(TypedDict, total=False):
"""Top-level configuration dictionary.
Attributes:
ontology_config: Configuration containing ontology resolver
"""
ontology_config: Optional[OntologyConfig]

View file

@ -28,7 +28,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
self,
ontology_file: Optional[str] = None,
matching_strategy: Optional[MatchingStrategy] = None,
):
) -> None:
super().__init__(matching_strategy)
self.ontology_file = ontology_file
try:

View file

@ -4,8 +4,8 @@ from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.tasks.storage.add_data_points import add_data_points
from cognee.modules.ontology.ontology_config import OntologyConfig
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import (
@ -26,7 +26,7 @@ async def integrate_chunk_graphs(
data_chunks: list[DocumentChunk],
chunk_graphs: list,
graph_model: Type[BaseModel],
ontology_adapter: RDFLibOntologyResolver,
ontology_resolver: RDFLibOntologyResolver,
) -> List[DocumentChunk]:
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
@ -38,9 +38,9 @@ async def integrate_chunk_graphs(
)
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
raise InvalidGraphModelError(graph_model)
if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
raise InvalidOntologyAdapterError(
type(ontology_adapter).__name__ if ontology_adapter else "None"
type(ontology_resolver).__name__ if ontology_resolver else "None"
)
graph_engine = await get_graph_engine()
@ -57,7 +57,7 @@ async def integrate_chunk_graphs(
)
graph_nodes, graph_edges = expand_with_nodes_and_edges(
data_chunks, chunk_graphs, ontology_adapter, existing_edges_map
data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
)
if len(graph_nodes) > 0:
@ -72,7 +72,7 @@ async def integrate_chunk_graphs(
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
graph_model: Type[BaseModel],
ontology_config: OntologyConfig = None,
config: Config = None,
custom_prompt: Optional[str] = None,
) -> List[DocumentChunk]:
"""
@ -104,9 +104,9 @@ async def extract_graph_from_data(
]
# Extract resolver from config if provided, otherwise get default
if ontology_config is None:
ontology_config = get_ontology_resolver()
if config is None:
config: Config = {"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}}
ontology_adapter = ontology_config["resolver"]
ontology_resolver = config["ontology_config"]["ontology_resolver"]
return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter)
return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)

View file

@ -2,13 +2,13 @@ import pytest
from rdflib import Graph, Namespace, RDF, OWL, RDFS
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
def test_ontology_adapter_initialization_success():
"""Test successful initialization of OntologyAdapter."""
config = get_ontology_resolver()
config = get_default_ontology_resolver()
adapter = config["resolver"]
adapter.build_lookup()
@ -108,7 +108,7 @@ def test_get_subgraph_no_match_rdflib():
"""Test get_subgraph returns empty results for a non-existent node."""
g = Graph()
config = get_ontology_resolver()
config = get_default_ontology_resolver()
resolver = config["resolver"]
resolver.graph = g
resolver.build_lookup()
@ -167,7 +167,7 @@ def test_refresh_lookup_rdflib():
"""Test that refresh_lookup rebuilds the lookup dict into a new object."""
g = Graph()
config = get_ontology_resolver()
config = get_default_ontology_resolver()
resolver = config["resolver"]
resolver.graph = g
resolver.build_lookup()
@ -272,89 +272,47 @@ def test_base_ontology_resolver_custom_matching_strategy():
def test_ontology_config_structure():
"""Test OntologyConfig TypedDict structure."""
from cognee.modules.ontology.ontology_config import OntologyConfig
"""Test TypedDict structure for ontology configuration."""
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
resolver = RDFLibOntologyResolver()
matching_strategy = FuzzyMatchingStrategy()
config: OntologyConfig = {"resolver": resolver, "matching_strategy": matching_strategy}
config: Config = {"ontology_config": {"ontology_resolver": resolver}}
assert config["resolver"] == resolver
assert config["matching_strategy"] == matching_strategy
assert config["ontology_config"]["ontology_resolver"] == resolver
def test_get_ontology_resolver_default():
"""Test get_ontology_resolver returns default configuration."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
config = get_ontology_resolver()
config: Config = get_default_ontology_resolver()
assert isinstance(config["resolver"], RDFLibOntologyResolver)
assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy)
assert config["resolver"].matching_strategy == config["matching_strategy"]
assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver)
assert isinstance(
config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy
)
def test_get_ontology_resolver_custom_resolver():
"""Test get_ontology_resolver with custom resolver."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
def test_get_default_ontology_resolver():
"""Test get_default_ontology_resolver returns default configuration."""
from cognee.modules.ontology.get_ontology_resolver import get_default_ontology_resolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl")
config = get_ontology_resolver(resolver=custom_resolver)
config: Config = get_default_ontology_resolver()
assert config["resolver"] == custom_resolver
assert config["matching_strategy"] == custom_resolver.matching_strategy
assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy)
def test_get_ontology_resolver_custom_matching_strategy():
"""Test get_ontology_resolver with custom matching strategy."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_strategy = FuzzyMatchingStrategy(cutoff=0.9)
config = get_ontology_resolver(matching_strategy=custom_strategy)
assert isinstance(config["resolver"], RDFLibOntologyResolver)
assert config["matching_strategy"] == custom_strategy
assert config["resolver"].matching_strategy == custom_strategy
def test_get_ontology_resolver_both_custom():
"""Test get_ontology_resolver with both custom resolver and matching strategy."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl")
custom_strategy = FuzzyMatchingStrategy(cutoff=0.9)
config = get_ontology_resolver(resolver=custom_resolver, matching_strategy=custom_strategy)
assert config["resolver"] == custom_resolver
assert config["matching_strategy"] == custom_strategy
def test_get_ontology_resolver_only_resolver_uses_resolver_strategy():
"""Test that when only resolver is passed, it uses the resolver's matching strategy."""
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
custom_strategy = FuzzyMatchingStrategy(cutoff=0.8)
custom_resolver = RDFLibOntologyResolver(matching_strategy=custom_strategy)
config = get_ontology_resolver(resolver=custom_resolver)
assert config["resolver"] == custom_resolver
assert config["matching_strategy"] == custom_strategy
assert config["matching_strategy"] == custom_resolver.matching_strategy
assert isinstance(config["ontology_config"]["ontology_resolver"], RDFLibOntologyResolver)
assert isinstance(
config["ontology_config"]["ontology_resolver"].matching_strategy, FuzzyMatchingStrategy
)
def test_rdflib_ontology_resolver_uses_matching_strategy():

View file

@ -5,8 +5,8 @@ import cognee
from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.shared.logging_utils import setup_logging
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.ontology_config import Config
text_1 = """
1. Audi
@ -62,12 +62,14 @@ async def main():
os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl"
)
# Create ontology config with custom ontology file
ontology_config = get_ontology_resolver(
resolver=RDFLibOntologyResolver(ontology_file=ontology_path)
)
# Create full config structure manually
config: Config = {
"ontology_config": {
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
}
}
await cognee.cognify(ontology_config=ontology_config)
await cognee.cognify(config=config)
print("Knowledge with ontology created.")
# Step 4: Query insights

View file

@ -5,8 +5,8 @@ import os
import textwrap
from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.ontology_config import Config
async def run_pipeline(ontology_path=None):
@ -19,11 +19,13 @@ async def run_pipeline(ontology_path=None):
await cognee.add(scientific_papers_dir)
ontology_config = get_ontology_resolver(
resolver=RDFLibOntologyResolver(ontology_file=ontology_path)
)
config: Config = {
"ontology_config": {
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
}
}
pipeline_run = await cognee.cognify(ontology_config=ontology_config)
pipeline_run = await cognee.cognify(config=config)
return pipeline_run