From 7c33418ae973fad68ac6ac3bda4ef88d15372bbb Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:24:15 +0200 Subject: [PATCH] chore: adds and updates docstrings --- .../utils/expand_with_nodes_and_edges.py | 2 +- cognee/tasks/graph/extract_graph_from_data.py | 25 ++++++++++++++++--- .../tasks/graph/extract_graph_from_data_v2.py | 18 ++++++++++--- .../modules/ontology/test_ontology_adapter.py | 10 ++++---- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index ef72cd0e1..5b603c163 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -298,7 +298,7 @@ def expand_with_nodes_and_edges( chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each data chunk. Each graph contains nodes (entities) and edges (relationships) extracted from the chunk content. - ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and + ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and types against an ontology. If None, a default RDFLibOntologyResolver is created. Defaults to None. existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 7c049546c..5c3b11821 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -6,7 +6,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points from cognee.modules.ontology.ontology_config import Config from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( expand_with_nodes_and_edges, @@ -26,9 +26,28 @@ async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, graph_model: Type[BaseModel], - ontology_resolver: RDFLibOntologyResolver, + ontology_resolver: BaseOntologyResolver, ) -> List[DocumentChunk]: - """Updates DocumentChunk objects, integrates data points and edges into databases.""" + """Integrate chunk graphs with ontology validation and store in databases. + + This function processes document chunks and their associated knowledge graphs, + validates entities against an ontology resolver, and stores the integrated + data points and edges in the configured databases. + + Args: + data_chunks: List of document chunks containing source data + chunk_graphs: List of knowledge graphs corresponding to each chunk + graph_model: Pydantic model class for graph data validation + ontology_resolver: Resolver for validating entities against ontology + + Returns: + List of updated DocumentChunk objects with integrated data + + Raises: + InvalidChunkGraphInputError: If input validation fails + InvalidGraphModelError: If graph model validation fails + InvalidOntologyAdapterError: If ontology resolver validation fails + """ if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list): raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.") diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index 5a4194fb1..0a8869784 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -3,7 +3,7 @@ from typing import List from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( extract_content_nodes_and_relationship_names, @@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs async def extract_graph_from_data( data_chunks: List[DocumentChunk], n_rounds: int = 2, - ontology_adapter: RDFLibOntologyResolver = None, + ontology_adapter: BaseOntologyResolver = None, ) -> List[DocumentChunk]: - """Extract and update graph data from document chunks in multiple steps.""" + """Extract and update graph data from document chunks using cascade extraction. + + This function performs multi-step graph extraction from document chunks, + using cascade extraction techniques to build comprehensive knowledge graphs. + + Args: + data_chunks: List of document chunks to process + n_rounds: Number of extraction rounds to perform (default: 2) + ontology_adapter: Resolver for validating entities against ontology + + Returns: + List of updated DocumentChunk objects with extracted graph data + """ chunk_nodes = await asyncio.gather( *[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks] ) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index d40f1369a..4757e2595 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -6,7 +6,7 @@ from cognee.modules.ontology.get_default_ontology_resolver import get_default_on def test_ontology_adapter_initialization_success(): - """Test successful initialization of OntologyAdapter.""" + """Test successful initialization of RDFLibOntologyResolver from get_default_ontology_resolver.""" adapter = get_default_ontology_resolver() adapter.build_lookup() @@ -104,7 +104,7 @@ def test_find_closest_match_no_match(): def test_get_subgraph_no_match_rdflib(): - """Test get_subgraph returns empty results for a non-existent node.""" + """Test get_subgraph returns empty results for a non-existent node using RDFLibOntologyResolver.""" g = Graph() resolver = get_default_ontology_resolver() @@ -162,7 +162,7 @@ def test_get_subgraph_success_rdflib(): def test_refresh_lookup_rdflib(): - """Test that refresh_lookup rebuilds the lookup dict into a new object.""" + """Test that refresh_lookup rebuilds the lookup dict into a new object using RDFLibOntologyResolver.""" g = Graph() resolver = get_default_ontology_resolver() @@ -283,7 +283,7 @@ def test_ontology_config_structure(): def test_get_ontology_resolver_default(): - """Test get_default_ontology_resolver returns default resolver.""" + """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy.""" from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy @@ -294,7 +294,7 @@ def test_get_ontology_resolver_default(): def test_get_default_ontology_resolver(): - """Test get_default_ontology_resolver returns default resolver.""" + """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy.""" from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy