From 74bc7c942054c3203ad20639ef3c08c7d59aea5f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 29 Sep 2025 21:22:21 +0200 Subject: [PATCH 1/8] refactor: set node_set to None for endpoint --- cognee/api/v1/add/routers/get_add_router.py | 6 +++++- cognee/api/v1/update/routers/get_update_router.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index dfa7d275b..4d0063cc9 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -73,7 +73,11 @@ def get_add_router() -> APIRouter: try: add_run = await cognee_add( - data, datasetName, user=user, dataset_id=datasetId, node_set=node_set + data, + datasetName, + user=user, + dataset_id=datasetId, + node_set=node_set if node_set else None, ) if isinstance(add_run, PipelineRunErrored): diff --git a/cognee/api/v1/update/routers/get_update_router.py b/cognee/api/v1/update/routers/get_update_router.py index 144bdd9e6..4101e1e31 100644 --- a/cognee/api/v1/update/routers/get_update_router.py +++ b/cognee/api/v1/update/routers/get_update_router.py @@ -75,7 +75,7 @@ def get_update_router() -> APIRouter: data=data, dataset_id=dataset_id, user=user, - node_set=node_set, + node_set=node_set if node_set else None, ) # If any cognify run errored return JSONResponse with proper error status code From 7ab000d891fa6ed6fbe6804050456ea55fad28ce Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 30 Sep 2025 18:12:22 +0200 Subject: [PATCH 2/8] refactor: Add test for updating of docs and visualization --- cognee/__init__.py | 1 + cognee/api/v1/update/update.py | 2 +- cognee/tests/test_library.py | 32 +++++++++++++++++++++++++++++++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/cognee/__init__.py b/cognee/__init__.py index 27dea1ad5..6e4d2a903 100644 --- a/cognee/__init__.py +++ b/cognee/__init__.py @@ -19,6 +19,7 @@ from .api.v1.add import add from .api.v1.delete import delete from .api.v1.cognify import cognify from .modules.memify import memify +from .api.v1.update import update from .api.v1.config.config import config from .api.v1.datasets.datasets import datasets from .api.v1.prune import prune diff --git a/cognee/api/v1/update/update.py b/cognee/api/v1/update/update.py index b4b1f5e5a..a421b3dc0 100644 --- a/cognee/api/v1/update/update.py +++ b/cognee/api/v1/update/update.py @@ -10,9 +10,9 @@ from cognee.api.v1.cognify import cognify async def update( data_id: UUID, data: Union[BinaryIO, list[BinaryIO], str, list[str]], + dataset_id: UUID, user: User = None, node_set: Optional[List[str]] = None, - dataset_id: Optional[UUID] = None, vector_db_config: dict = None, graph_db_config: dict = None, preferred_loaders: List[str] = None, diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index c5e6cc64b..33eb64f29 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -6,6 +6,7 @@ from cognee.modules.search.operations import get_history from cognee.modules.users.methods import get_default_user from cognee.shared.logging_utils import get_logger from cognee.modules.search.types import SearchType +from cognee import update logger = get_logger() @@ -42,7 +43,7 @@ async def main(): await cognee.add([text], dataset_name) - await cognee.cognify([dataset_name]) + cognify_run_info = await cognee.cognify([dataset_name]) from cognee.infrastructure.databases.vector import get_vector_engine @@ -77,6 +78,35 @@ async def main(): assert len(history) == 6, "Search history is not correct." + # Test updating of documents + # Get Pipeline Run object + pipeline_run_obj = list(cognify_run_info.values())[0] + for data_item in pipeline_run_obj.data_ingestion_info: + # Update all documents in dataset to only contain Mark and Cindy information + await update( + dataset_id=pipeline_run_obj.dataset_id, + data_id=data_item["data_id"], + data="Mark met with Cindy at a cafe.", + ) + + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="What information do you contain?" + ) + assert "Mark" in search_results[0], ( + "Failed to update document, no mention of Mark in search results" + ) + assert "Cindy" in search_results[0], ( + "Failed to update document, no mention of Cindy in search results" + ) + assert "Artificial intelligence" not in search_results[0], ( + "Failed to update document, Artificial intelligence still mentioned in search results" + ) + + # Test visualization + from cognee import visualize_graph + + await visualize_graph() + # Assert local data files are cleaned properly await cognee.prune.prune_data() data_root_directory = get_storage_config()["data_root_directory"] From 5b46f86be5c8a5c3065f3daa1bec2d5d52d56eb6 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 1 Oct 2025 17:59:53 +0200 Subject: [PATCH 3/8] test: Removed long text string about qunatum computers from tests. Used a file instead. --- cognee/tests/test_chromadb.py | 16 +++++--------- cognee/tests/test_deduplication.py | 8 +------ cognee/tests/test_kuzu.py | 17 ++++++-------- cognee/tests/test_lancedb.py | 16 +++++--------- cognee/tests/test_memgraph.py | 16 +++++--------- cognee/tests/test_neo4j.py | 16 +++++--------- cognee/tests/test_neptune_analytics_vector.py | 16 +++++--------- cognee/tests/test_permissions.py | 22 ++++++++----------- cognee/tests/test_pgvector.py | 18 ++++++--------- cognee/tests/test_remote_kuzu.py | 17 ++++++-------- cognee/tests/test_search_db.py | 14 +++++------- 11 files changed, 67 insertions(+), 109 deletions(-) diff --git a/cognee/tests/test_chromadb.py b/cognee/tests/test_chromadb.py index 01e1938d7..b663f674b 100644 --- a/cognee/tests/test_chromadb.py +++ b/cognee/tests/test_chromadb.py @@ -133,20 +133,16 @@ async def main(): dataset_name_1 = "natural_language" dataset_name_2 = "quantum" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name_1) + await cognee.add([explanation_file_path_nlp], dataset_name_1) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) - await cognee.add([text], dataset_name_2) + await cognee.add([explanation_file_path_quantum], dataset_name_2) await cognee.cognify([dataset_name_2, dataset_name_1]) diff --git a/cognee/tests/test_deduplication.py b/cognee/tests/test_deduplication.py index bef813317..e799f36fc 100644 --- a/cognee/tests/test_deduplication.py +++ b/cognee/tests/test_deduplication.py @@ -49,13 +49,7 @@ async def test_deduplication(): await cognee.prune.prune_system(metadata=True) # Test deduplication of text input - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + text = os.path.join(pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt") await cognee.add([text], dataset_name) await cognee.add([text], dataset_name2) diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py index 6afd4540a..349f58fc4 100644 --- a/cognee/tests/test_kuzu.py +++ b/cognee/tests/test_kuzu.py @@ -38,19 +38,16 @@ async def main(): dataset_name = "cs_explanations" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name) + await cognee.add([explanation_file_path_nlp], dataset_name) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ - await cognee.add([text], dataset_name) + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) + + await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.cognify([dataset_name]) diff --git a/cognee/tests/test_lancedb.py b/cognee/tests/test_lancedb.py index ea3b455cd..7a2bffe4d 100644 --- a/cognee/tests/test_lancedb.py +++ b/cognee/tests/test_lancedb.py @@ -131,20 +131,16 @@ async def main(): dataset_name_1 = "natural_language" dataset_name_2 = "quantum" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name_1) + await cognee.add([explanation_file_path_nlp], dataset_name_1) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) - await cognee.add([text], dataset_name_2) + await cognee.add([explanation_file_path_quantum], dataset_name_2) await cognee.cognify([dataset_name_2, dataset_name_1]) diff --git a/cognee/tests/test_memgraph.py b/cognee/tests/test_memgraph.py index b21cf3735..e8ae6d6be 100644 --- a/cognee/tests/test_memgraph.py +++ b/cognee/tests/test_memgraph.py @@ -32,20 +32,16 @@ async def main(): dataset_name = "cs_explanations" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name) + await cognee.add([explanation_file_path_nlp], dataset_name) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) - await cognee.add([text], dataset_name) + await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.cognify([dataset_name]) diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index 7f24e8418..6a7a7565c 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -32,20 +32,16 @@ async def main(): dataset_name = "cs_explanations" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name) + await cognee.add([explanation_file_path_nlp], dataset_name) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) - await cognee.add([text], dataset_name) + await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.cognify([dataset_name]) diff --git a/cognee/tests/test_neptune_analytics_vector.py b/cognee/tests/test_neptune_analytics_vector.py index eececacdd..072c7d724 100644 --- a/cognee/tests/test_neptune_analytics_vector.py +++ b/cognee/tests/test_neptune_analytics_vector.py @@ -38,20 +38,16 @@ async def main(): dataset_name = "cs_explanations" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name) + await cognee.add([explanation_file_path_nlp], dataset_name) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) - await cognee.add([text], dataset_name) + await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.cognify([dataset_name]) diff --git a/cognee/tests/test_permissions.py b/cognee/tests/test_permissions.py index 95f769263..3c8a4606a 100644 --- a/cognee/tests/test_permissions.py +++ b/cognee/tests/test_permissions.py @@ -34,25 +34,21 @@ async def main(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) # Add document for default user - await cognee.add([explanation_file_path], dataset_name="NLP") + await cognee.add([explanation_file_path_nlp], dataset_name="NLP") default_user = await get_default_user() - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) # Add document for test user test_user = await create_user("user@example.com", "example") - await cognee.add([text], dataset_name="QUANTUM", user=test_user) + await cognee.add([explanation_file_path_quantum], dataset_name="QUANTUM", user=test_user) nlp_cognify_result = await cognee.cognify(["NLP"], user=default_user) quantum_cognify_result = await cognee.cognify(["QUANTUM"], user=test_user) @@ -101,7 +97,7 @@ async def main(): add_error = False try: await cognee.add( - [explanation_file_path], + [explanation_file_path_nlp], dataset_name="QUANTUM", dataset_id=test_user_dataset_id, user=default_user, @@ -143,7 +139,7 @@ async def main(): # Add new data to test_users dataset from default_user await cognee.add( - [explanation_file_path], + [explanation_file_path_nlp], dataset_name="QUANTUM", dataset_id=test_user_dataset_id, user=default_user, @@ -216,7 +212,7 @@ async def main(): ) # Try deleting data from test_user dataset with default_user after getting delete permission - # Get the dataset data to find the ID of the remaining data item (explanation_file_path) + # Get the dataset data to find the ID of the remaining data item (explanation_file_path_nlp) test_user_dataset_data = await get_dataset_data(test_user_dataset_id) explanation_file_data_id = test_user_dataset_data[0].id diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index 059cf691a..cc78c536a 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -141,20 +141,16 @@ async def main(): dataset_name_1 = "natural_language" dataset_name_2 = "quantum" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name_1) + await cognee.add([explanation_file_path_nlp], dataset_name_1) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) - await cognee.add([text], dataset_name_2) + await cognee.add([explanation_file_path_quantum], dataset_name_2) await cognee.cognify([dataset_name_2, dataset_name_1]) @@ -202,7 +198,7 @@ async def main(): history = await get_history(user.id) assert len(history) == 8, "Search history is not correct." - await test_local_file_deletion(text, explanation_file_path) + await test_local_file_deletion(explanation_file_path_quantum, explanation_file_path_nlp) await cognee.prune.prune_data() data_root_directory = get_storage_config()["data_root_directory"] diff --git a/cognee/tests/test_remote_kuzu.py b/cognee/tests/test_remote_kuzu.py index bbff48096..e0c3b223c 100644 --- a/cognee/tests/test_remote_kuzu.py +++ b/cognee/tests/test_remote_kuzu.py @@ -42,19 +42,16 @@ async def main(): dataset_name = "cs_explanations" - explanation_file_path = os.path.join( + explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) - await cognee.add([explanation_file_path], dataset_name) + await cognee.add([explanation_file_path_nlp], dataset_name) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ - await cognee.add([text], dataset_name) + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) + + await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.cognify([dataset_name]) diff --git a/cognee/tests/test_search_db.py b/cognee/tests/test_search_db.py index cb4636470..e24abd0f5 100644 --- a/cognee/tests/test_search_db.py +++ b/cognee/tests/test_search_db.py @@ -1,3 +1,5 @@ +import pathlib +import os import cognee from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge @@ -27,15 +29,11 @@ async def main(): text_1 = """Germany is located in europe right next to the Netherlands""" await cognee.add(text_1, dataset_name) - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ + explanation_file_path_quantum = os.path.join( + pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" + ) - await cognee.add([text], dataset_name) + await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.cognify([dataset_name]) From 6f0756f312b4e46be72150b331104be837eb9a03 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 1 Oct 2025 18:10:57 +0200 Subject: [PATCH 4/8] test: Rollback deduplication test --- cognee/tests/test_deduplication.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cognee/tests/test_deduplication.py b/cognee/tests/test_deduplication.py index e799f36fc..bef813317 100644 --- a/cognee/tests/test_deduplication.py +++ b/cognee/tests/test_deduplication.py @@ -49,7 +49,13 @@ async def test_deduplication(): await cognee.prune.prune_system(metadata=True) # Test deduplication of text input - text = os.path.join(pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt") + text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. + At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. + Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. + The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. + Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. + In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. + """ await cognee.add([text], dataset_name) await cognee.add([text], dataset_name2) From a744f8d43519891245a1aae91630e39aadbc00ee Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 2 Oct 2025 09:54:30 +0200 Subject: [PATCH 5/8] test: Rollback pgvector test. Was failing for some reason. --- cognee/tests/test_pgvector.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index cc78c536a..f3c191da3 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -146,11 +146,15 @@ async def main(): ) await cognee.add([explanation_file_path_nlp], dataset_name_1) - explanation_file_path_quantum = os.path.join( - pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" - ) + text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. + At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. + Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. + The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. + Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. + In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. + """ - await cognee.add([explanation_file_path_quantum], dataset_name_2) + await cognee.add([text], dataset_name_2) await cognee.cognify([dataset_name_2, dataset_name_1]) @@ -198,7 +202,7 @@ async def main(): history = await get_history(user.id) assert len(history) == 8, "Search history is not correct." - await test_local_file_deletion(explanation_file_path_quantum, explanation_file_path_nlp) + await test_local_file_deletion(text, explanation_file_path_nlp) await cognee.prune.prune_data() data_root_directory = get_storage_config()["data_root_directory"] From 63a1463073227429e729f33df82ef23427b38fdc Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Tue, 7 Oct 2025 16:03:12 +0100 Subject: [PATCH 6/8] Deprecate `SearchType.INSIGHTS`, replace all references to default search type - `SearchType.GRAPH_COMPLETION` --- cognee-mcp/src/server.py | 2 +- cognee/api/v1/cognify/cognify.py | 2 +- .../modules/retrieval/insights_retriever.py | 133 ---------- .../search/methods/get_search_type_tools.py | 5 - cognee/modules/search/types/SearchType.py | 1 - cognee/tests/test_chromadb.py | 2 +- cognee/tests/test_kuzu.py | 2 +- cognee/tests/test_lancedb.py | 2 +- cognee/tests/test_library.py | 2 +- cognee/tests/test_memgraph.py | 2 +- cognee/tests/test_neo4j.py | 2 +- cognee/tests/test_neptune_analytics_vector.py | 2 +- cognee/tests/test_pgvector.py | 2 +- cognee/tests/test_remote_kuzu.py | 2 +- cognee/tests/test_s3_file_storage.py | 2 +- .../retrieval/insights_retriever_test.py | 251 ------------------ evals/src/qa/qa_benchmark_cognee.py | 2 +- .../database_examples/chromadb_example.py | 4 +- examples/database_examples/kuzu_example.py | 4 +- examples/database_examples/neo4j_example.py | 4 +- .../neptune_analytics_example.py | 2 +- .../database_examples/pgvector_example.py | 4 +- examples/python/simple_example.py | 4 +- notebooks/cognee_demo.ipynb | 2 +- notebooks/neptune-analytics-example.ipynb | 2 +- 25 files changed, 31 insertions(+), 411 deletions(-) delete mode 100644 cognee/modules/retrieval/insights_retriever.py delete mode 100644 cognee/tests/unit/modules/retrieval/insights_retriever_test.py diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index cc6eac09e..1eceae07b 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -255,7 +255,7 @@ async def cognify( # 2. Get entity relationships and connections relationships = await cognee.search( "connections between concepts", - query_type=SearchType.INSIGHTS + query_type=SearchType.GRAPH_COMPLETION ) # 3. Find relevant document chunks diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 1292d243a..c3045f00a 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -148,7 +148,7 @@ async def cognify( # 2. Get entity relationships and connections relationships = await cognee.search( "connections between concepts", - query_type=SearchType.INSIGHTS + query_type=SearchType.GRAPH_COMPLETION ) # 3. Find relevant document chunks diff --git a/cognee/modules/retrieval/insights_retriever.py b/cognee/modules/retrieval/insights_retriever.py deleted file mode 100644 index 10aa288e6..000000000 --- a/cognee/modules/retrieval/insights_retriever.py +++ /dev/null @@ -1,133 +0,0 @@ -import asyncio -from typing import Any, Optional - -from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node -from cognee.modules.retrieval.base_graph_retriever import BaseGraphRetriever -from cognee.shared.logging_utils import get_logger -from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.infrastructure.databases.vector import get_vector_engine -from cognee.modules.retrieval.exceptions.exceptions import NoDataError -from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError - -logger = get_logger("InsightsRetriever") - - -class InsightsRetriever(BaseGraphRetriever): - """ - Retriever for handling graph connection-based insights. - - Public methods include: - - get_context - - get_completion - - Instance variables include: - - exploration_levels - - top_k - """ - - def __init__(self, exploration_levels: int = 1, top_k: Optional[int] = 5): - """Initialize retriever with exploration levels and search parameters.""" - self.exploration_levels = exploration_levels - self.top_k = top_k - - async def get_context(self, query: str) -> list: - """ - Find neighbours of a given node in the graph. - - If the provided query does not correspond to an existing node, - search for similar entities and retrieve their connections. - Reraises NoDataError if there is no data found in the system. - - Parameters: - ----------- - - - query (str): A string identifier for the node whose neighbours are to be - retrieved. - - Returns: - -------- - - - list: A list of unique connections found for the queried node. - """ - if query is None: - return [] - - node_id = query - graph_engine = await get_graph_engine() - exact_node = await graph_engine.extract_node(node_id) - - if exact_node is not None and "id" in exact_node: - node_connections = await graph_engine.get_connections(str(exact_node["id"])) - else: - vector_engine = get_vector_engine() - - try: - results = await asyncio.gather( - vector_engine.search("Entity_name", query_text=query, limit=self.top_k), - vector_engine.search("EntityType_name", query_text=query, limit=self.top_k), - ) - except CollectionNotFoundError as error: - logger.error("Entity collections not found") - raise NoDataError("No data found in the system, please add data first.") from error - - results = [*results[0], *results[1]] - relevant_results = [result for result in results if result.score < 0.5][: self.top_k] - - if len(relevant_results) == 0: - return [] - - node_connections_results = await asyncio.gather( - *[graph_engine.get_connections(result.id) for result in relevant_results] - ) - - node_connections = [] - for neighbours in node_connections_results: - node_connections.extend(neighbours) - - unique_node_connections_map = {} - unique_node_connections = [] - - for node_connection in node_connections: - if "id" not in node_connection[0] or "id" not in node_connection[2]: - continue - - unique_id = f"{node_connection[0]['id']} {node_connection[1]['relationship_name']} {node_connection[2]['id']}" - if unique_id not in unique_node_connections_map: - unique_node_connections_map[unique_id] = True - unique_node_connections.append(node_connection) - - return unique_node_connections - # return [ - # Edge( - # node1=Node(node_id=connection[0]["id"], attributes=connection[0]), - # node2=Node(node_id=connection[2]["id"], attributes=connection[2]), - # attributes={ - # **connection[1], - # "relationship_type": connection[1]["relationship_name"], - # }, - # ) - # for connection in unique_node_connections - # ] - - async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: - """ - Returns the graph connections context. - - If a context is not provided, it fetches the context using the query provided. - - Parameters: - ----------- - - - query (str): A string identifier used to fetch the context. - - context (Optional[Any]): An optional context to use for the completion; if None, - it fetches the context based on the query. (default None) - - Returns: - -------- - - - Any: The context used for the completion, which is either provided or fetched - based on the query. - """ - if context is None: - context = await self.get_context(query) - return context diff --git a/cognee/modules/search/methods/get_search_type_tools.py b/cognee/modules/search/methods/get_search_type_tools.py index 9cf67785e..72e2db89a 100644 --- a/cognee/modules/search/methods/get_search_type_tools.py +++ b/cognee/modules/search/methods/get_search_type_tools.py @@ -9,7 +9,6 @@ from cognee.modules.search.exceptions import UnsupportedSearchTypeError # Retrievers from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback from cognee.modules.retrieval.chunks_retriever import ChunksRetriever -from cognee.modules.retrieval.insights_retriever import InsightsRetriever from cognee.modules.retrieval.summaries_retriever import SummariesRetriever from cognee.modules.retrieval.completion_retriever import CompletionRetriever from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever @@ -44,10 +43,6 @@ async def get_search_type_tools( SummariesRetriever(top_k=top_k).get_completion, SummariesRetriever(top_k=top_k).get_context, ], - SearchType.INSIGHTS: [ - InsightsRetriever(top_k=top_k).get_completion, - InsightsRetriever(top_k=top_k).get_context, - ], SearchType.CHUNKS: [ ChunksRetriever(top_k=top_k).get_completion, ChunksRetriever(top_k=top_k).get_context, diff --git a/cognee/modules/search/types/SearchType.py b/cognee/modules/search/types/SearchType.py index 418aec0b5..4536c0e01 100644 --- a/cognee/modules/search/types/SearchType.py +++ b/cognee/modules/search/types/SearchType.py @@ -3,7 +3,6 @@ from enum import Enum class SearchType(Enum): SUMMARIES = "SUMMARIES" - INSIGHTS = "INSIGHTS" CHUNKS = "CHUNKS" RAG_COMPLETION = "RAG_COMPLETION" GRAPH_COMPLETION = "GRAPH_COMPLETION" diff --git a/cognee/tests/test_chromadb.py b/cognee/tests/test_chromadb.py index 01e1938d7..f7d82a042 100644 --- a/cognee/tests/test_chromadb.py +++ b/cognee/tests/test_chromadb.py @@ -159,7 +159,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py index 6afd4540a..76a79f2b2 100644 --- a/cognee/tests/test_kuzu.py +++ b/cognee/tests/test_kuzu.py @@ -61,7 +61,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_lancedb.py b/cognee/tests/test_lancedb.py index ea3b455cd..1e667dcde 100644 --- a/cognee/tests/test_lancedb.py +++ b/cognee/tests/test_lancedb.py @@ -157,7 +157,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index c5e6cc64b..fa621ff12 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -51,7 +51,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_memgraph.py b/cognee/tests/test_memgraph.py index b21cf3735..9e2e94b36 100644 --- a/cognee/tests/test_memgraph.py +++ b/cognee/tests/test_memgraph.py @@ -56,7 +56,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index 7f24e8418..90427baf2 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -56,7 +56,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_neptune_analytics_vector.py b/cognee/tests/test_neptune_analytics_vector.py index eececacdd..be868537f 100644 --- a/cognee/tests/test_neptune_analytics_vector.py +++ b/cognee/tests/test_neptune_analytics_vector.py @@ -60,7 +60,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index 059cf691a..11ed9bf2e 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -167,7 +167,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_remote_kuzu.py b/cognee/tests/test_remote_kuzu.py index bbff48096..2c3b054a6 100644 --- a/cognee/tests/test_remote_kuzu.py +++ b/cognee/tests/test_remote_kuzu.py @@ -65,7 +65,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/test_s3_file_storage.py b/cognee/tests/test_s3_file_storage.py index 3a20b232c..c7fc62cf2 100755 --- a/cognee/tests/test_s3_file_storage.py +++ b/cognee/tests/test_s3_file_storage.py @@ -47,7 +47,7 @@ async def main(): random_node_name = random_node.payload["text"] search_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text=random_node_name + query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name ) assert len(search_results) != 0, "The search results list is empty." print("\n\nExtracted sentences are:\n") diff --git a/cognee/tests/unit/modules/retrieval/insights_retriever_test.py b/cognee/tests/unit/modules/retrieval/insights_retriever_test.py deleted file mode 100644 index 21dbc98dd..000000000 --- a/cognee/tests/unit/modules/retrieval/insights_retriever_test.py +++ /dev/null @@ -1,251 +0,0 @@ -import os -import pytest -import pathlib - -import cognee -from cognee.low_level import setup -from cognee.tasks.storage import add_data_points -from cognee.modules.engine.models import Entity, EntityType -from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.infrastructure.databases.vector import get_vector_engine -from cognee.modules.retrieval.exceptions.exceptions import NoDataError -from cognee.modules.retrieval.insights_retriever import InsightsRetriever - - -class TestInsightsRetriever: - @pytest.mark.asyncio - async def test_insights_context_simple(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_insights_context_simple" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_insights_context_simple" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - entityTypePerson = EntityType( - name="Person", - description="An individual", - ) - - person1 = Entity( - name="Steve Rodger", - is_a=entityTypePerson, - description="An American actor, comedian, and filmmaker", - ) - - person2 = Entity( - name="Mike Broski", - is_a=entityTypePerson, - description="Financial advisor and philanthropist", - ) - - person3 = Entity( - name="Christina Mayer", - is_a=entityTypePerson, - description="Maker of next generation of iconic American music videos", - ) - - entityTypeCompany = EntityType( - name="Company", - description="An organization that operates on an annual basis", - ) - - company1 = Entity( - name="Apple", - is_a=entityTypeCompany, - description="An American multinational technology company headquartered in Cupertino, California", - ) - - company2 = Entity( - name="Google", - is_a=entityTypeCompany, - description="An American multinational technology company that specializes in Internet-related services and products", - ) - - company3 = Entity( - name="Facebook", - is_a=entityTypeCompany, - description="An American social media, messaging, and online platform", - ) - - entities = [person1, person2, person3, company1, company2, company3] - - await add_data_points(entities) - - retriever = InsightsRetriever() - - context = await retriever.get_context("Mike") - - assert context[0][0]["name"] == "Mike Broski", "Failed to get Mike Broski" - - @pytest.mark.asyncio - async def test_insights_context_complex(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_insights_context_complex" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_insights_context_complex" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - entityTypePerson = EntityType( - name="Person", - description="An individual", - ) - - person1 = Entity( - name="Steve Rodger", - is_a=entityTypePerson, - description="An American actor, comedian, and filmmaker", - ) - - person2 = Entity( - name="Mike Broski", - is_a=entityTypePerson, - description="Financial advisor and philanthropist", - ) - - person3 = Entity( - name="Christina Mayer", - is_a=entityTypePerson, - description="Maker of next generation of iconic American music videos", - ) - - person4 = Entity( - name="Jason Statham", - is_a=entityTypePerson, - description="An American actor", - ) - - person5 = Entity( - name="Mike Tyson", - is_a=entityTypePerson, - description="A former professional boxer from the United States", - ) - - entityTypeCompany = EntityType( - name="Company", - description="An organization that operates on an annual basis", - ) - - company1 = Entity( - name="Apple", - is_a=entityTypeCompany, - description="An American multinational technology company headquartered in Cupertino, California", - ) - - company2 = Entity( - name="Google", - is_a=entityTypeCompany, - description="An American multinational technology company that specializes in Internet-related services and products", - ) - - company3 = Entity( - name="Facebook", - is_a=entityTypeCompany, - description="An American social media, messaging, and online platform", - ) - - entities = [person1, person2, person3, company1, company2, company3] - - await add_data_points(entities) - - graph_engine = await get_graph_engine() - - await graph_engine.add_edges( - [ - ( - (str)(person1.id), - (str)(company1.id), - "works_for", - dict( - relationship_name="works_for", - source_node_id=person1.id, - target_node_id=company1.id, - ), - ), - ( - (str)(person2.id), - (str)(company2.id), - "works_for", - dict( - relationship_name="works_for", - source_node_id=person2.id, - target_node_id=company2.id, - ), - ), - ( - (str)(person3.id), - (str)(company3.id), - "works_for", - dict( - relationship_name="works_for", - source_node_id=person3.id, - target_node_id=company3.id, - ), - ), - ( - (str)(person4.id), - (str)(company1.id), - "works_for", - dict( - relationship_name="works_for", - source_node_id=person4.id, - target_node_id=company1.id, - ), - ), - ( - (str)(person5.id), - (str)(company1.id), - "works_for", - dict( - relationship_name="works_for", - source_node_id=person5.id, - target_node_id=company1.id, - ), - ), - ] - ) - - retriever = InsightsRetriever(top_k=20) - - context = await retriever.get_context("Christina") - - assert context[0][0]["name"] == "Christina Mayer", "Failed to get Christina Mayer" - - @pytest.mark.asyncio - async def test_insights_context_on_empty_graph(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_insights_context_on_empty_graph" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_insights_context_on_empty_graph" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - - retriever = InsightsRetriever() - - with pytest.raises(NoDataError): - await retriever.get_context("Christina Mayer") - - vector_engine = get_vector_engine() - await vector_engine.create_collection("Entity_name", payload_schema=Entity) - await vector_engine.create_collection("EntityType_name", payload_schema=EntityType) - - context = await retriever.get_context("Christina Mayer") - assert context == [], "Returned context should be empty on an empty graph" diff --git a/evals/src/qa/qa_benchmark_cognee.py b/evals/src/qa/qa_benchmark_cognee.py index 2a09f250d..e4b2faf85 100644 --- a/evals/src/qa/qa_benchmark_cognee.py +++ b/evals/src/qa/qa_benchmark_cognee.py @@ -34,7 +34,7 @@ class CogneeConfig(QABenchmarkConfig): system_prompt_path: str = "answer_simple_question_benchmark2.txt" # Search parameters (fallback if not using eval framework) - search_type: SearchType = SearchType.INSIGHTS + search_type: SearchType = SearchType.GRAPH_COMPLETION # Clean slate on initialization clean_start: bool = True diff --git a/examples/database_examples/chromadb_example.py b/examples/database_examples/chromadb_example.py index defa7f78d..71005fb4f 100644 --- a/examples/database_examples/chromadb_example.py +++ b/examples/database_examples/chromadb_example.py @@ -57,7 +57,9 @@ async def main(): # Now let's perform some searches # 1. Search for insights related to "ChromaDB" - insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="ChromaDB") + insights_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="ChromaDB" + ) print("\nInsights about ChromaDB:") for result in insights_results: print(f"- {result}") diff --git a/examples/database_examples/kuzu_example.py b/examples/database_examples/kuzu_example.py index a31404cbc..ab24a0e07 100644 --- a/examples/database_examples/kuzu_example.py +++ b/examples/database_examples/kuzu_example.py @@ -55,7 +55,9 @@ async def main(): # Now let's perform some searches # 1. Search for insights related to "KuzuDB" - insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="KuzuDB") + insights_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="KuzuDB" + ) print("\nInsights about KuzuDB:") for result in insights_results: print(f"- {result}") diff --git a/examples/database_examples/neo4j_example.py b/examples/database_examples/neo4j_example.py index 45985610f..b5b6bfbbc 100644 --- a/examples/database_examples/neo4j_example.py +++ b/examples/database_examples/neo4j_example.py @@ -64,7 +64,9 @@ async def main(): # Now let's perform some searches # 1. Search for insights related to "Neo4j" - insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Neo4j") + insights_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="Neo4j" + ) print("\nInsights about Neo4j:") for result in insights_results: print(f"- {result}") diff --git a/examples/database_examples/neptune_analytics_example.py b/examples/database_examples/neptune_analytics_example.py index 5d36e2803..d98d1768c 100644 --- a/examples/database_examples/neptune_analytics_example.py +++ b/examples/database_examples/neptune_analytics_example.py @@ -79,7 +79,7 @@ async def main(): # Now let's perform some searches # 1. Search for insights related to "Neptune Analytics" insights_results = await cognee.search( - query_type=SearchType.INSIGHTS, query_text="Neptune Analytics" + query_type=SearchType.GRAPH_COMPLETION, query_text="Neptune Analytics" ) print("\n========Insights about Neptune Analytics========:") for result in insights_results: diff --git a/examples/database_examples/pgvector_example.py b/examples/database_examples/pgvector_example.py index 19dbb44e9..026cbfd08 100644 --- a/examples/database_examples/pgvector_example.py +++ b/examples/database_examples/pgvector_example.py @@ -69,7 +69,9 @@ async def main(): # Now let's perform some searches # 1. Search for insights related to "PGVector" - insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="PGVector") + insights_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text="PGVector" + ) print("\nInsights about PGVector:") for result in insights_results: print(f"- {result}") diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index 41be06f25..c13e48f85 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -50,7 +50,9 @@ async def main(): query_text = "Tell me about NLP" print(f"Searching cognee for insights with query: '{query_text}'") # Query cognee for insights on the added text - search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=query_text) + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text=query_text + ) print("Search results:") # Display results diff --git a/notebooks/cognee_demo.ipynb b/notebooks/cognee_demo.ipynb index 51eeab560..8983ccb09 100644 --- a/notebooks/cognee_demo.ipynb +++ b/notebooks/cognee_demo.ipynb @@ -1795,7 +1795,7 @@ } ], "source": [ - "search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=node_name)\n", + "search_results = await cognee.search(query_type=SearchType.GRAPH_COMPLETION, query_text=node_name)\n", "print(\"\\n\\nExtracted sentences are:\\n\")\n", "for result in search_results:\n", " print(f\"{result}\\n\")" diff --git a/notebooks/neptune-analytics-example.ipynb b/notebooks/neptune-analytics-example.ipynb index 197918db5..e80ea4dcb 100644 --- a/notebooks/neptune-analytics-example.ipynb +++ b/notebooks/neptune-analytics-example.ipynb @@ -295,7 +295,7 @@ "cell_type": "code", "source": [ "# Search graph insights\n", - "insights_results = await search(query_text=\"Neptune Analytics\", query_type=SearchType.INSIGHTS)\n", + "insights_results = await search(query_text=\"Neptune Analytics\", query_type=SearchType.GRAPH_COMPLETION)\n", "print(\"\\nInsights about Neptune Analytics:\")\n", "for result in insights_results:\n", " src_node = result[0].get(\"name\", result[0][\"type\"])\n", From 97f90c95ed6371631e598bdb76682b2cd6c2cace Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Tue, 7 Oct 2025 16:09:11 +0100 Subject: [PATCH 7/8] Remove all references to `SearchType.INSIGHTS` across the codebase, meaningfully replacing it with `SearchType.GRAPH_COMPLETION` where applicable. --- .../src/modules/chat/hooks/useChat.ts | 17 ----------------- cognee-mcp/README.md | 2 +- cognee-mcp/src/server.py | 11 ----------- cognee/api/v1/responses/default_tools.py | 1 - cognee/api/v1/responses/dispatch_function.py | 2 +- .../api/v1/responses/routers/default_tools.py | 1 - cognee/api/v1/search/search.py | 9 --------- cognee/cli/commands/search_command.py | 4 ---- cognee/cli/config.py | 1 - .../llm/prompts/search_type_selector_prompt.txt | 5 ----- .../utils/description_to_codepart_search.py | 2 +- .../cli_tests/cli_unit_tests/test_cli_utils.py | 1 - 12 files changed, 3 insertions(+), 53 deletions(-) diff --git a/cognee-frontend/src/modules/chat/hooks/useChat.ts b/cognee-frontend/src/modules/chat/hooks/useChat.ts index 613f9134f..ed5bc4d79 100644 --- a/cognee-frontend/src/modules/chat/hooks/useChat.ts +++ b/cognee-frontend/src/modules/chat/hooks/useChat.ts @@ -89,15 +89,6 @@ export default function useChat(dataset: Dataset) { } -interface Node { - name: string; -} - -interface Relationship { - relationship_name: string; -} - -type InsightMessage = [Node, Relationship, Node]; // eslint-disable-next-line @typescript-eslint/no-explicit-any function convertToSearchTypeOutput(systemMessage: any[] | any, searchType: string): string { @@ -106,14 +97,6 @@ function convertToSearchTypeOutput(systemMessage: any[] | any, searchType: strin } switch (searchType) { - case "INSIGHTS": - return systemMessage.map((message: InsightMessage) => { - const [node1, relationship, node2] = message; - if (node1.name && node2.name) { - return `${node1.name} ${relationship.relationship_name} ${node2.name}.`; - } - return ""; - }).join("\n"); case "SUMMARIES": return systemMessage.map((message: { text: string }) => message.text).join("\n"); case "CHUNKS": diff --git a/cognee-mcp/README.md b/cognee-mcp/README.md index 8d973725b..aa6442993 100644 --- a/cognee-mcp/README.md +++ b/cognee-mcp/README.md @@ -266,7 +266,7 @@ The MCP server exposes its functionality through tools. Call them from any MCP c - **codify**: Analyse a code repository, build a code graph, stores it in memory -- **search**: Query memory – supports GRAPH_COMPLETION, RAG_COMPLETION, CODE, CHUNKS, INSIGHTS +- **search**: Query memory – supports GRAPH_COMPLETION, RAG_COMPLETION, CODE, CHUNKS - **list_data**: List all datasets and their data items with IDs for deletion operations diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index 1eceae07b..e7c82c99b 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -478,11 +478,6 @@ async def search(search_query: str, search_type: str) -> list: Best for: Direct document retrieval, specific fact-finding. Returns: LLM responses based on relevant text chunks. - **INSIGHTS**: - Structured entity relationships and semantic connections. - Best for: Understanding concept relationships, knowledge mapping. - Returns: Formatted relationship data and entity connections. - **CHUNKS**: Raw text segments that match the query semantically. Best for: Finding specific passages, citations, exact content. @@ -524,7 +519,6 @@ async def search(search_query: str, search_type: str) -> list: - "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data - "CODE": Returns code-related knowledge in JSON format - "CHUNKS": Returns raw text chunks from the knowledge graph - - "INSIGHTS": Returns relationships between nodes in readable format - "SUMMARIES": Returns pre-generated hierarchical summaries - "CYPHER": Direct graph database queries - "FEELING_LUCKY": Automatically selects best search type @@ -537,7 +531,6 @@ async def search(search_query: str, search_type: str) -> list: A list containing a single TextContent object with the search results. The format of the result depends on the search_type: - **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings - - **INSIGHTS**: Formatted relationship descriptions and entity connections - **CHUNKS**: Relevant text passages with source metadata - **SUMMARIES**: Hierarchical summaries from general to specific - **CODE**: Structured code information with context @@ -547,7 +540,6 @@ async def search(search_query: str, search_type: str) -> list: Performance & Optimization: - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal) - - **INSIGHTS**: Fast, returns structured relationships without LLM processing - **CHUNKS**: Fastest, pure vector similarity search without LLM - **SUMMARIES**: Fast, returns pre-computed summaries - **CODE**: Medium speed, specialized for code understanding @@ -586,9 +578,6 @@ async def search(search_query: str, search_type: str) -> list: return str(search_results[0]) elif search_type.upper() == "CHUNKS": return str(search_results) - elif search_type.upper() == "INSIGHTS": - results = retrieved_edges_to_string(search_results) - return results else: return str(search_results) diff --git a/cognee/api/v1/responses/default_tools.py b/cognee/api/v1/responses/default_tools.py index 295d132f1..f3d643df2 100644 --- a/cognee/api/v1/responses/default_tools.py +++ b/cognee/api/v1/responses/default_tools.py @@ -14,7 +14,6 @@ DEFAULT_TOOLS = [ "type": "string", "description": "Type of search to perform", "enum": [ - "INSIGHTS", "CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE", diff --git a/cognee/api/v1/responses/dispatch_function.py b/cognee/api/v1/responses/dispatch_function.py index 85388b564..aea37c350 100644 --- a/cognee/api/v1/responses/dispatch_function.py +++ b/cognee/api/v1/responses/dispatch_function.py @@ -59,7 +59,7 @@ async def handle_search(arguments: Dict[str, Any], user) -> list: valid_search_types = ( search_tool["parameters"]["properties"]["search_type"]["enum"] if search_tool - else ["INSIGHTS", "CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"] + else ["CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"] ) if search_type_str not in valid_search_types: diff --git a/cognee/api/v1/responses/routers/default_tools.py b/cognee/api/v1/responses/routers/default_tools.py index e43620a4b..4194e3376 100644 --- a/cognee/api/v1/responses/routers/default_tools.py +++ b/cognee/api/v1/responses/routers/default_tools.py @@ -14,7 +14,6 @@ DEFAULT_TOOLS = [ "type": "string", "description": "Type of search to perform", "enum": [ - "INSIGHTS", "CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE", diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 7209c6036..0a9e76e96 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -52,11 +52,6 @@ async def search( Best for: Direct document retrieval, specific fact-finding. Returns: LLM responses based on relevant text chunks. - **INSIGHTS**: - Structured entity relationships and semantic connections. - Best for: Understanding concept relationships, knowledge mapping. - Returns: Formatted relationship data and entity connections. - **CHUNKS**: Raw text segments that match the query semantically. Best for: Finding specific passages, citations, exact content. @@ -124,9 +119,6 @@ async def search( **GRAPH_COMPLETION/RAG_COMPLETION**: [List of conversational AI response strings] - **INSIGHTS**: - [List of formatted relationship descriptions and entity connections] - **CHUNKS**: [List of relevant text passages with source metadata] @@ -146,7 +138,6 @@ async def search( Performance & Optimization: - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal) - - **INSIGHTS**: Fast, returns structured relationships without LLM processing - **CHUNKS**: Fastest, pure vector similarity search without LLM - **SUMMARIES**: Fast, returns pre-computed summaries - **CODE**: Medium speed, specialized for code understanding diff --git a/cognee/cli/commands/search_command.py b/cognee/cli/commands/search_command.py index 3540a833c..4c53cc260 100644 --- a/cognee/cli/commands/search_command.py +++ b/cognee/cli/commands/search_command.py @@ -31,10 +31,6 @@ Search Types & Use Cases: Traditional RAG using document chunks without graph structure. Best for: Direct document retrieval, specific fact-finding. -**INSIGHTS**: - Structured entity relationships and semantic connections. - Best for: Understanding concept relationships, knowledge mapping. - **CHUNKS**: Raw text segments that match the query semantically. Best for: Finding specific passages, citations, exact content. diff --git a/cognee/cli/config.py b/cognee/cli/config.py index 31e8693c7..d016608c1 100644 --- a/cognee/cli/config.py +++ b/cognee/cli/config.py @@ -19,7 +19,6 @@ COMMAND_DESCRIPTIONS = { SEARCH_TYPE_CHOICES = [ "GRAPH_COMPLETION", "RAG_COMPLETION", - "INSIGHTS", "CHUNKS", "SUMMARIES", "CODE", diff --git a/cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt b/cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt index 7ed2e72fc..1a00bce7e 100644 --- a/cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +++ b/cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt @@ -10,8 +10,6 @@ Here are the available `SearchType` tools and their specific functions: - Summarizing large amounts of information - Quick understanding of complex subjects -* **`INSIGHTS`**: The `INSIGHTS` search type discovers connections and relationships between entities in the knowledge graph. - **Best for:** - Discovering how entities are connected @@ -95,9 +93,6 @@ Here are the available `SearchType` tools and their specific functions: Query: "Summarize the key findings from these research papers" Response: `SUMMARIES` -Query: "What is the relationship between the methodologies used in these papers?" -Response: `INSIGHTS` - Query: "When was Einstein born?" Response: `CHUNKS` diff --git a/cognee/modules/retrieval/utils/description_to_codepart_search.py b/cognee/modules/retrieval/utils/description_to_codepart_search.py index a61feb574..649edb6ed 100644 --- a/cognee/modules/retrieval/utils/description_to_codepart_search.py +++ b/cognee/modules/retrieval/utils/description_to_codepart_search.py @@ -62,7 +62,7 @@ async def code_description_to_code_part( try: if include_docs: - search_results = await search(query_text=query, query_type="INSIGHTS") + search_results = await search(query_text=query, query_type="GRAPH_COMPLETION") concatenated_descriptions = " ".join( obj["description"] diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py index 00ee5f442..05de0c70b 100644 --- a/cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +++ b/cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py @@ -53,7 +53,6 @@ class TestCliConfig: expected_types = [ "GRAPH_COMPLETION", "RAG_COMPLETION", - "INSIGHTS", "CHUNKS", "SUMMARIES", "CODE", From b75aa6f9ad43ec6af0515c5e650472acc70575f8 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Fri, 10 Oct 2025 12:59:28 +0200 Subject: [PATCH 8/8] refactor: Keep code DRY in health endpoint --- cognee/api/health.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/cognee/api/health.py b/cognee/api/health.py index 0241a0712..ac30f3bf4 100644 --- a/cognee/api/health.py +++ b/cognee/api/health.py @@ -241,16 +241,6 @@ class HealthChecker: """Get comprehensive health status.""" components = {} - # Critical services - critical_components = [ - "relational_db", - "vector_db", - "graph_db", - "file_storage", - "llm_provider", - "embedding_service", - ] - critical_checks = [ ("relational_db", self.check_relational_db()), ("vector_db", self.check_vector_db()), @@ -296,11 +286,11 @@ class HealthChecker: else: components[name] = result + critical_comps = [check[0] for check in critical_checks] # Determine overall status critical_unhealthy = any( - comp.status == HealthStatus.UNHEALTHY + comp.status == HealthStatus.UNHEALTHY and name in critical_comps for name, comp in components.items() - if name in critical_components ) has_degraded = any(comp.status == HealthStatus.DEGRADED for comp in components.values())