From fc89f71e7ca14ae807186eacc63a773acd06949c Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 12 Dec 2025 15:57:49 +0100 Subject: [PATCH] fix: fixes hotpot and twowiki tests (that are using url to download dataset) --- .../eval_framework/benchmark_adapters_test.py | 25 +++++++++++++ .../eval_framework/corpus_builder_test.py | 37 +++++++++++++++++-- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/cognee/tests/unit/eval_framework/benchmark_adapters_test.py b/cognee/tests/unit/eval_framework/benchmark_adapters_test.py index 70ec43cf8..b18012594 100644 --- a/cognee/tests/unit/eval_framework/benchmark_adapters_test.py +++ b/cognee/tests/unit/eval_framework/benchmark_adapters_test.py @@ -11,6 +11,22 @@ MOCK_JSONL_DATA = """\ {"id": "2", "question": "What is ML?", "answer": "Machine Learning", "paragraphs": [{"paragraph_text": "ML is a subset of AI."}]} """ +MOCK_HOTPOT_CORPUS = [ + { + "_id": "1", + "question": "Next to which country is Germany located?", + "answer": "Netherlands", + # HotpotQA uses "level"; TwoWikiMultiHop uses "type". + "level": "easy", + "type": "comparison", + "context": [ + ["Germany", ["Germany is in Europe."]], + ["Netherlands", ["The Netherlands borders Germany."]], + ], + "supporting_facts": [["Netherlands", 0]], + } +] + ADAPTER_CLASSES = [ HotpotQAAdapter, @@ -35,6 +51,11 @@ def test_adapter_can_instantiate_and_load(AdapterClass): adapter = AdapterClass() result = adapter.load_corpus() + elif AdapterClass in (HotpotQAAdapter, TwoWikiMultihopAdapter): + with patch.object(AdapterClass, "_get_raw_corpus", return_value=MOCK_HOTPOT_CORPUS): + adapter = AdapterClass() + result = adapter.load_corpus() + else: adapter = AdapterClass() result = adapter.load_corpus() @@ -64,6 +85,10 @@ def test_adapter_returns_some_content(AdapterClass): ): adapter = AdapterClass() corpus_list, qa_pairs = adapter.load_corpus(limit=limit) + elif AdapterClass in (HotpotQAAdapter, TwoWikiMultihopAdapter): + with patch.object(AdapterClass, "_get_raw_corpus", return_value=MOCK_HOTPOT_CORPUS): + adapter = AdapterClass() + corpus_list, qa_pairs = adapter.load_corpus(limit=limit) else: adapter = AdapterClass() corpus_list, qa_pairs = adapter.load_corpus(limit=limit) diff --git a/cognee/tests/unit/eval_framework/corpus_builder_test.py b/cognee/tests/unit/eval_framework/corpus_builder_test.py index 14136bea5..53f886b58 100644 --- a/cognee/tests/unit/eval_framework/corpus_builder_test.py +++ b/cognee/tests/unit/eval_framework/corpus_builder_test.py @@ -2,15 +2,38 @@ import pytest from cognee.eval_framework.corpus_builder.corpus_builder_executor import CorpusBuilderExecutor from cognee.infrastructure.databases.graph import get_graph_engine from unittest.mock import AsyncMock, patch +from cognee.eval_framework.benchmark_adapters.hotpot_qa_adapter import HotpotQAAdapter benchmark_options = ["HotPotQA", "Dummy", "TwoWikiMultiHop"] +MOCK_HOTPOT_CORPUS = [ + { + "_id": "1", + "question": "Next to which country is Germany located?", + "answer": "Netherlands", + # HotpotQA uses "level"; TwoWikiMultiHop uses "type". + "level": "easy", + "type": "comparison", + "context": [ + ["Germany", ["Germany is in Europe."]], + ["Netherlands", ["The Netherlands borders Germany."]], + ], + "supporting_facts": [["Netherlands", 0]], + } +] + @pytest.mark.parametrize("benchmark", benchmark_options) def test_corpus_builder_load_corpus(benchmark): limit = 2 - corpus_builder = CorpusBuilderExecutor(benchmark, "Default") - raw_corpus, questions = corpus_builder.load_corpus(limit=limit) + if benchmark in ("HotPotQA", "TwoWikiMultiHop"): + with patch.object(HotpotQAAdapter, "_get_raw_corpus", return_value=MOCK_HOTPOT_CORPUS): + corpus_builder = CorpusBuilderExecutor(benchmark, "Default") + raw_corpus, questions = corpus_builder.load_corpus(limit=limit) + else: + corpus_builder = CorpusBuilderExecutor(benchmark, "Default") + raw_corpus, questions = corpus_builder.load_corpus(limit=limit) + assert len(raw_corpus) > 0, f"Corpus builder loads empty corpus for {benchmark}" assert len(questions) <= 2, ( f"Corpus builder loads {len(questions)} for {benchmark} when limit is {limit}" @@ -22,8 +45,14 @@ def test_corpus_builder_load_corpus(benchmark): @patch.object(CorpusBuilderExecutor, "run_cognee", new_callable=AsyncMock) async def test_corpus_builder_build_corpus(mock_run_cognee, benchmark): limit = 2 - corpus_builder = CorpusBuilderExecutor(benchmark, "Default") - questions = await corpus_builder.build_corpus(limit=limit) + if benchmark in ("HotPotQA", "TwoWikiMultiHop"): + with patch.object(HotpotQAAdapter, "_get_raw_corpus", return_value=MOCK_HOTPOT_CORPUS): + corpus_builder = CorpusBuilderExecutor(benchmark, "Default") + questions = await corpus_builder.build_corpus(limit=limit) + else: + corpus_builder = CorpusBuilderExecutor(benchmark, "Default") + questions = await corpus_builder.build_corpus(limit=limit) + assert len(questions) <= 2, ( f"Corpus builder loads {len(questions)} for {benchmark} when limit is {limit}" )