From 400095d76df23c33b7c4783654d381255459d0a4 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 21:30:13 +0200 Subject: [PATCH 1/6] fix: Resolve issue with multi-user mode search --- cognee/api/v1/search/search.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 9f158e9d0..4051bae86 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -179,13 +179,6 @@ async def search( if not datasets: raise DatasetNotFoundError(message="No datasets found.") - graph_engine = await get_graph_engine() - is_empty = await graph_engine.is_empty() - - if is_empty: - logger.warning("Search attempt on an empty knowledge graph") - return [] - filtered_search_results = await search_function( query_text=query_text, query_type=query_type, From f88277c467e81f3d63b0e2f713be3d06c3c19276 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 23:10:53 +0200 Subject: [PATCH 2/6] fix: Resolve issue with plain text files not having magic file info --- cognee/infrastructure/files/utils/guess_file_type.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py index edd2d89b0..dcdd68cad 100644 --- a/cognee/infrastructure/files/utils/guess_file_type.py +++ b/cognee/infrastructure/files/utils/guess_file_type.py @@ -124,6 +124,12 @@ def guess_file_type(file: BinaryIO) -> filetype.Type: """ file_type = filetype.guess(file) + # If file type could not be determined consider it a plain text file as they don't have magic number encoding + if file_type is None: + from filetype.types.base import Type + + file_type = Type("text/plain", "txt") + if file_type is None: raise FileTypeException(f"Unknown file detected: {file.name}.") From 8c627d9e10df49d8c2315592b664081fab45e486 Mon Sep 17 00:00:00 2001 From: Hande <159312713+hande-k@users.noreply.github.com> Date: Mon, 20 Oct 2025 12:03:40 +0200 Subject: [PATCH 3/6] chore: update colab notebook on README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a1eebae73..305bffdfe 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext ## Get Started -Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo +Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo ## About cognee @@ -224,12 +224,12 @@ We now have a paper you can cite: ```bibtex @misc{markovic2025optimizinginterfaceknowledgegraphs, - title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning}, + title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning}, author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic}, year={2025}, eprint={2505.24478}, archivePrefix={arXiv}, primaryClass={cs.AI}, - url={https://arxiv.org/abs/2505.24478}, + url={https://arxiv.org/abs/2505.24478}, } ``` From 279d6e80f03420838ae9e4ca81648563290d4d36 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Mon, 20 Oct 2025 11:56:15 +0100 Subject: [PATCH 4/6] Revert "fix: search without prior cognify" --- cognee/api/v1/search/search.py | 4 ---- .../databases/graph/graph_db_interface.py | 5 ---- .../databases/graph/kuzu/adapter.py | 9 -------- .../databases/graph/neo4j_driver/adapter.py | 9 -------- cognee/tests/test_kuzu.py | 23 ++++--------------- cognee/tests/test_neo4j.py | 22 ++++-------------- cognee/tests/unit/api/test_search.py | 21 ----------------- 7 files changed, 9 insertions(+), 84 deletions(-) delete mode 100644 cognee/tests/unit/api/test_search.py diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 4051bae86..0a9e76e96 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -1,7 +1,6 @@ from uuid import UUID from typing import Union, Optional, List, Type -from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.users.models import User from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult @@ -9,9 +8,6 @@ from cognee.modules.users.methods import get_default_user from cognee.modules.search.methods import search as search_function from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.modules.data.exceptions import DatasetNotFoundError -from cognee.shared.logging_utils import get_logger - -logger = get_logger() async def search( diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index 67df1a27c..65afdf275 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -159,11 +159,6 @@ class GraphDBInterface(ABC): - get_connections """ - @abstractmethod - async def is_empty(self) -> bool: - logger.warning("is_empty() is not implemented") - return True - @abstractmethod async def query(self, query: str, params: dict) -> List[Any]: """ diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py index 2d3866888..3f0fb0c57 100644 --- a/cognee/infrastructure/databases/graph/kuzu/adapter.py +++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py @@ -198,15 +198,6 @@ class KuzuAdapter(GraphDBInterface): except FileNotFoundError: logger.warning(f"Kuzu S3 storage file not found: {self.db_path}") - async def is_empty(self) -> bool: - query = """ - MATCH (n) - RETURN true - LIMIT 1; - """ - query_result = await self.query(query) - return len(query_result) == 0 - async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]: """ Execute a Kuzu query asynchronously with automatic reconnection. diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index 5861b69cb..520295ed2 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -87,15 +87,6 @@ class Neo4jAdapter(GraphDBInterface): async with self.driver.session(database=self.graph_database_name) as session: yield session - async def is_empty(self) -> bool: - query = """ - RETURN EXISTS { - MATCH (n) - } AS node_exists; - """ - query_result = await self.query(query) - return not query_result[0]["node_exists"] - @deadlock_retry() async def query( self, diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py index fe9da6dcb..8749e42d0 100644 --- a/cognee/tests/test_kuzu.py +++ b/cognee/tests/test_kuzu.py @@ -47,26 +47,10 @@ async def main(): pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" ) - from cognee.infrastructure.databases.graph import get_graph_engine - - graph_engine = await get_graph_engine() - - is_empty = await graph_engine.is_empty() - - assert is_empty, "Kuzu graph database is not empty" - await cognee.add([explanation_file_path_quantum], dataset_name) - is_empty = await graph_engine.is_empty() - - assert is_empty, "Kuzu graph database should be empty before cognify" - await cognee.cognify([dataset_name]) - is_empty = await graph_engine.is_empty() - - assert not is_empty, "Kuzu graph database should not be empty" - from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() @@ -130,10 +114,11 @@ async def main(): assert not os.path.isdir(data_root_directory), "Local data files are not deleted" await cognee.prune.prune_system(metadata=True) + from cognee.infrastructure.databases.graph import get_graph_engine - is_empty = await graph_engine.is_empty() - - assert is_empty, "Kuzu graph database is not empty" + graph_engine = await get_graph_engine() + nodes, edges = await graph_engine.get_graph_data() + assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty" finally: # Ensure cleanup even if tests fail diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index 925614e67..c74b4ab65 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -35,14 +35,6 @@ async def main(): explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - from cognee.infrastructure.databases.graph import get_graph_engine - - graph_engine = await get_graph_engine() - - is_empty = await graph_engine.is_empty() - - assert is_empty, "Graph has to be empty" - await cognee.add([explanation_file_path_nlp], dataset_name) explanation_file_path_quantum = os.path.join( @@ -50,16 +42,9 @@ async def main(): ) await cognee.add([explanation_file_path_quantum], dataset_name) - is_empty = await graph_engine.is_empty() - - assert is_empty, "Graph has to be empty before cognify" await cognee.cognify([dataset_name]) - is_empty = await graph_engine.is_empty() - - assert not is_empty, "Graph shouldn't be empty" - from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() @@ -132,8 +117,11 @@ async def main(): assert not os.path.isdir(data_root_directory), "Local data files are not deleted" await cognee.prune.prune_system(metadata=True) - is_empty = await graph_engine.is_empty() - assert is_empty, "Neo4j graph database is not empty" + from cognee.infrastructure.databases.graph import get_graph_engine + + graph_engine = await get_graph_engine() + nodes, edges = await graph_engine.get_graph_data() + assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty" if __name__ == "__main__": diff --git a/cognee/tests/unit/api/test_search.py b/cognee/tests/unit/api/test_search.py deleted file mode 100644 index 54a4cc35f..000000000 --- a/cognee/tests/unit/api/test_search.py +++ /dev/null @@ -1,21 +0,0 @@ -import pytest -import cognee - - -@pytest.mark.asyncio -async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph(): - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await cognee.add("Sample input") - result = await cognee.search("Sample query") - assert result == [] - - -@pytest.mark.asyncio -async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError(): - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await cognee.add("Sample input") - await cognee.cognify() - result = await cognee.search("Sample query") - assert result != [] From 3e54b67b4d7f20c385afad0bc878943df9a0b86c Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Mon, 20 Oct 2025 15:03:35 +0200 Subject: [PATCH 5/6] fix: Resolve missing argument for distributed (#1563) ## Description Resolve missing argument for distributed ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com> --- cognee/modules/pipelines/operations/run_tasks_distributed.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cognee/modules/pipelines/operations/run_tasks_distributed.py b/cognee/modules/pipelines/operations/run_tasks_distributed.py index 95cdb0266..3fce3763d 100644 --- a/cognee/modules/pipelines/operations/run_tasks_distributed.py +++ b/cognee/modules/pipelines/operations/run_tasks_distributed.py @@ -88,6 +88,7 @@ async def run_tasks_distributed( pipeline_name: str = "unknown_pipeline", context: dict = None, incremental_loading: bool = False, + data_per_batch: int = 20, ): if not user: user = await get_default_user() From df038365c848775229e1c9255d56992352b1990e Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 20 Oct 2025 17:27:49 +0200 Subject: [PATCH 6/6] fix: fixes id in get_filtered_graph_data (#1569) ## Description Fixes get_filtered_graph_data method in neo4jAdapter. ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index 520295ed2..365d02979 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -1067,7 +1067,7 @@ class Neo4jAdapter(GraphDBInterface): query_nodes = f""" MATCH (n) WHERE {where_clause} - RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties + RETURN n.id AS id, labels(n) AS labels, properties(n) AS properties """ result_nodes = await self.query(query_nodes) @@ -1082,7 +1082,7 @@ class Neo4jAdapter(GraphDBInterface): query_edges = f""" MATCH (n)-[r]->(m) WHERE {where_clause} AND {where_clause.replace("n.", "m.")} - RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties + RETURN n.id AS source, n.id AS target, TYPE(r) AS type, properties(r) AS properties """ result_edges = await self.query(query_edges)