diff --git a/cognee/tests/unit/eval_framework/benchmark_adapters_test.py b/cognee/tests/unit/eval_framework/benchmark_adapters_test.py index 34bf0d22a..6a99b8101 100644 --- a/cognee/tests/unit/eval_framework/benchmark_adapters_test.py +++ b/cognee/tests/unit/eval_framework/benchmark_adapters_test.py @@ -3,6 +3,13 @@ from evals.eval_framework.benchmark_adapters.hotpot_qa_adapter import HotpotQAAd from evals.eval_framework.benchmark_adapters.musique_adapter import MusiqueQAAdapter from evals.eval_framework.benchmark_adapters.dummy_adapter import DummyAdapter from evals.eval_framework.benchmark_adapters.twowikimultihop_adapter import TwoWikiMultihopAdapter +from unittest.mock import patch, mock_open + + +MOCK_JSONL_DATA = """\ +{"id": "1", "question": "What is AI?", "answer": "Artificial Intelligence", "paragraphs": [{"paragraph_text": "AI is a field of computer science."}]} +{"id": "2", "question": "What is ML?", "answer": "Machine Learning", "paragraphs": [{"paragraph_text": "ML is a subset of AI."}]} +""" ADAPTER_CLASSES = [ @@ -19,8 +26,18 @@ def test_adapter_can_instantiate_and_load(AdapterClass): Basic smoke test: instantiate each adapter, call load_corpus with no limit, and ensure it returns the expected tuple of (list, list). """ - adapter = AdapterClass() - result = adapter.load_corpus() + if AdapterClass == MusiqueQAAdapter: + with ( + patch.object(MusiqueQAAdapter, "_musique_download_file"), + patch("builtins.open", new_callable=mock_open, read_data=MOCK_JSONL_DATA), + patch("os.path.exists", return_value=True), + ): + adapter = AdapterClass() + result = adapter.load_corpus() + + else: + adapter = AdapterClass() + result = adapter.load_corpus() assert isinstance(result, tuple), f"{AdapterClass.__name__} did not return a tuple." assert len(result) == 2, f"{AdapterClass.__name__} returned tuple of length != 2." @@ -38,32 +55,26 @@ def test_adapter_returns_some_content(AdapterClass): Verify that the adapter returns some data and that each QA dict at least has a 'question' and 'answer' key (you can extend or remove as needed). """ - adapter = AdapterClass() + limit = 3 + if AdapterClass == MusiqueQAAdapter: + with ( + patch.object(MusiqueQAAdapter, "_musique_download_file"), + patch("builtins.open", new_callable=mock_open, read_data=MOCK_JSONL_DATA), + patch("os.path.exists", return_value=True), + ): + adapter = AdapterClass() + corpus_list, qa_pairs = adapter.load_corpus(limit=limit) + else: + adapter = AdapterClass() + corpus_list, qa_pairs = adapter.load_corpus(limit=limit) - corpus_list, qa_pairs = adapter.load_corpus(limit=3) # small limit # We don't know how large the dataset is, but we expect at least 1 item assert len(corpus_list) > 0, f"{AdapterClass.__name__} returned an empty corpus_list." assert len(qa_pairs) > 0, f"{AdapterClass.__name__} returned an empty question_answer_pairs." + assert len(qa_pairs) <= limit, ( + f"{AdapterClass.__name__} returned more QA items than requested limit={limit}." + ) for item in qa_pairs: assert "question" in item, f"{AdapterClass.__name__} missing 'question' key in QA pair." assert "answer" in item, f"{AdapterClass.__name__} missing 'answer' key in QA pair." - - -@pytest.mark.parametrize("AdapterClass", ADAPTER_CLASSES) -def test_adapter_limit(AdapterClass): - """ - Check that the `limit` parameter correctly restricts the amount of data returned. - We'll test with limit=5. - """ - adapter = AdapterClass() - - limit = 5 - corpus_list, qa_pairs = adapter.load_corpus(limit=limit) - - # Confirm that we didn't receive more than 'limit' - # (Some adapters might be allowed to return fewer if the dataset is small) - - assert len(qa_pairs) <= limit, ( - f"{AdapterClass.__name__} returned more QA items than requested limit={limit}." - )