test: add retriever tests

2025-11-05 17:29:40 +01:00 · 2025-11-05 17:29:40 +01:00 · 215ef7f3c2
commit 215ef7f3c2
parent 33b0516381
6 changed files with 271 additions and 6 deletions
--- a/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py
+++ b/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py
@ -0,0 +1,65 @@
 import os
 import pytest
 import pathlib
 from pydantic import BaseModel
 import cognee
 from cognee.low_level import setup
 from cognee.tasks.storage import add_data_points
 from cognee.modules.engine.models import Entity, EntityType
 from cognee.modules.retrieval.EntityCompletionRetriever import EntityCompletionRetriever
 from cognee.modules.retrieval.entity_extractors.DummyEntityExtractor import DummyEntityExtractor
 from cognee.modules.retrieval.context_providers.DummyContextProvider import DummyContextProvider
 class TestAnswer(BaseModel):
    answer: str
    explanation: str
 # TODO: Add more tests, similar to other retrievers.
 # TODO: For the tests, one needs to define an Entity Extractor and a Context Provider.
 class TestEntityCompletionRetriever:
    @pytest.mark.asyncio
    async def test_get_entity_structured_completion(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_get_entity_structured_completion"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_get_entity_structured_completion"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        await setup()
        entity_type = EntityType(name="Person", description="A human individual")
        entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist")
        entities = [entity]
        await add_data_points(entities)
        retriever = EntityCompletionRetriever(DummyEntityExtractor(), DummyContextProvider())
        # Test with string response model (default)
        string_answer = await retriever.get_completion("Who is Albert Einstein?")
        assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in string_answer), (
            "Answer should not be empty"
        )
        # Test with structured response model
        structured_answer = await retriever.get_completion(
            "Who is Albert Einstein?", response_model=TestAnswer
        )
        assert isinstance(structured_answer, list), (
            f"Expected list, got {type(structured_answer).__name__}"
        )
        assert all(isinstance(item, TestAnswer) for item in structured_answer), (
            f"Expected TestAnswer, got {type(structured_answer).__name__}"
        )
        assert structured_answer[0].answer.strip(), "Answer field should not be empty"
        assert structured_answer[0].explanation.strip(), "Explanation field should not be empty"
--- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py
+++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py
@ -183,15 +183,15 @@ class TestGraphCompletionWithContextExtensionRetriever:
        )
    @pytest.mark.asyncio
-    async def test_get_structured_completion_extension_context(self):
+    async def test_get_graph_structured_completion_extension_context(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent,
-            ".cognee_system/test_get_structured_completion_extension_context",
+            ".cognee_system/test_get_graph_structured_completion_extension_context",
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent,
-            ".data_storage/test_get_structured_completion_extension_context",
+            ".data_storage/test_get_graph_structured_completion_extension_context",
        )
        cognee.config.data_root_directory(data_directory_path)
--- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py
+++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py
@ -176,13 +176,13 @@ class TestGraphCompletionCoTRetriever:
        )
    @pytest.mark.asyncio
-    async def test_get_structured_completion(self):
+    async def test_get_graph_structured_completion_cot(self):
        system_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".cognee_system/test_get_structured_completion"
+            pathlib.Path(__file__).parent, ".cognee_system/test_get_graph_structured_completion_cot"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".data_storage/test_get_structured_completion"
+            pathlib.Path(__file__).parent, ".data_storage/test_get_graph_structured_completion_cot"
        )
        cognee.config.data_root_directory(data_directory_path)
--- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py
+++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py
@ -2,6 +2,7 @@ import os
 import pytest
 import pathlib
 from typing import Optional, Union
 from pydantic import BaseModel
 import cognee
 from cognee.low_level import setup, DataPoint
@ -10,6 +11,11 @@ from cognee.tasks.storage import add_data_points
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
 class TestAnswer(BaseModel):
    answer: str
    explanation: str
 class TestGraphCompletionRetriever:
    @pytest.mark.asyncio
    async def test_graph_completion_context_simple(self):
@ -221,3 +227,54 @@ class TestGraphCompletionRetriever:
        context = await retriever.get_context("Who works at Figma?")
        assert context == [], "Context should be empty on an empty graph"
    @pytest.mark.asyncio
    async def test_get_graph_structured_completion(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_get_graph_structured_completion"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_get_graph_structured_completion"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        await setup()
        class Company(DataPoint):
            name: str
        class Person(DataPoint):
            name: str
            works_for: Company
        company1 = Company(name="Figma")
        person1 = Person(name="Steve Rodger", works_for=company1)
        entities = [company1, person1]
        await add_data_points(entities)
        retriever = GraphCompletionRetriever()
        # Test with string response model (default)
        string_answer = await retriever.get_completion("Who works at Figma?")
        assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in string_answer), (
            "Answer should not be empty"
        )
        # Test with structured response model
        structured_answer = await retriever.get_completion(
            "Who works at Figma?", response_model=TestAnswer
        )
        assert isinstance(structured_answer, list), (
            f"Expected list, got {type(structured_answer).__name__}"
        )
        assert all(isinstance(item, TestAnswer) for item in structured_answer), (
            f"Expected TestAnswer, got {type(structured_answer).__name__}"
        )
        assert structured_answer[0].answer.strip(), "Answer field should not be empty"
        assert structured_answer[0].explanation.strip(), "Explanation field should not be empty"
--- a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py
+++ b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py
@ -3,6 +3,7 @@ from typing import List
 import pytest
 import pathlib
 import cognee
 from pydantic import BaseModel
 from cognee.low_level import setup
 from cognee.tasks.storage import add_data_points
 from cognee.infrastructure.databases.vector import get_vector_engine
@ -26,6 +27,11 @@ class DocumentChunkWithEntities(DataPoint):
    metadata: dict = {"index_fields": ["text"]}
 class TestAnswer(BaseModel):
    answer: str
    explanation: str
 class TestRAGCompletionRetriever:
    @pytest.mark.asyncio
    async def test_rag_completion_context_simple(self):
@ -202,3 +208,76 @@ class TestRAGCompletionRetriever:
        context = await retriever.get_context("Christina Mayer")
        assert context == "", "Returned context should be empty on an empty graph"
    @pytest.mark.asyncio
    async def test_get_rag_structured_completion(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_get_rag_structured_completion"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_get_rag_structured_completion"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        await setup()
        document = TextDocument(
            name="Steve Rodger's career",
            raw_data_location="somewhere",
            external_metadata="",
            mime_type="text/plain",
        )
        chunk1 = DocumentChunk(
            text="Steve Rodger",
            chunk_size=2,
            chunk_index=0,
            cut_type="sentence_end",
            is_part_of=document,
            contains=[],
        )
        chunk2 = DocumentChunk(
            text="Mike Broski",
            chunk_size=2,
            chunk_index=1,
            cut_type="sentence_end",
            is_part_of=document,
            contains=[],
        )
        chunk3 = DocumentChunk(
            text="Christina Mayer",
            chunk_size=2,
            chunk_index=2,
            cut_type="sentence_end",
            is_part_of=document,
            contains=[],
        )
        entities = [chunk1, chunk2, chunk3]
        await add_data_points(entities)
        retriever = CompletionRetriever()
        # Test with string response model (default)
        string_answer = await retriever.get_completion("Where does Steve work?")
        assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in string_answer), (
            "Answer should not be empty"
        )
        # Test with structured response model
        structured_answer = await retriever.get_completion(
            "Where does Steve work?", response_model=TestAnswer
        )
        assert isinstance(structured_answer, list), (
            f"Expected list, got {type(structured_answer).__name__}"
        )
        assert all(isinstance(item, TestAnswer) for item in structured_answer), (
            f"Expected TestAnswer, got {type(structured_answer).__name__}"
        )
        assert structured_answer[0].answer.strip(), "Answer field should not be empty"
        assert structured_answer[0].explanation.strip(), "Explanation field should not be empty"
--- a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py
+++ b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py
@ -1,7 +1,13 @@
 import asyncio
 import os
 import pathlib
 import cognee
 from types import SimpleNamespace
 import pytest
 from pydantic import BaseModel
 from cognee.low_level import setup, DataPoint
 from cognee.tasks.storage import add_data_points
 from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
@ -141,6 +147,64 @@ async def test_filter_top_k_events_error_handling():
        await tr.filter_top_k_events([{}], [])
 class TestAnswer(BaseModel):
    answer: str
    explanation: str
@pytest.mark.asyncio
 async def test_get_temporal_structured_completion():
    system_directory_path = os.path.join(
        pathlib.Path(__file__).parent, ".cognee_system/test_get_temporal_structured_completion"
    )
    cognee.config.system_root_directory(system_directory_path)
    data_directory_path = os.path.join(
        pathlib.Path(__file__).parent, ".data_storage/test_get_temporal_structured_completion"
    )
    cognee.config.data_root_directory(data_directory_path)
    await cognee.prune.prune_data()
    await cognee.prune.prune_system(metadata=True)
    await setup()
    class Company(DataPoint):
        name: str
    class Person(DataPoint):
        name: str
        works_for: Company
        works_since: int
    company1 = Company(name="Figma")
    person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015)
    entities = [company1, person1]
    await add_data_points(entities)
    retriever = TemporalRetriever()
    # Test with string response model (default)
    string_answer = await retriever.get_completion("When did Steve start working at Figma?")
    assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}"
    assert all(isinstance(item, str) and item.strip() for item in string_answer), (
        "Answer should not be empty"
    )
    # Test with structured response model
    structured_answer = await retriever.get_completion(
        "When did Steve start working at Figma??", response_model=TestAnswer
    )
    assert isinstance(structured_answer, list), (
        f"Expected list, got {type(structured_answer).__name__}"
    )
    assert all(isinstance(item, TestAnswer) for item in structured_answer), (
        f"Expected TestAnswer, got {type(structured_answer).__name__}"
    )
    assert structured_answer[0].answer.strip(), "Answer field should not be empty"
    assert structured_answer[0].explanation.strip(), "Explanation field should not be empty"
 class _FakeRetriever(TemporalRetriever):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)