Test: Mock file download and open in musique adapter (#571)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **Tests** - Enhanced test coverage to improve adapter instantiation and data loading reliability. - Updated mock testing logic to ensure robust content handling. - Removed an outdated test focused on data limit validation. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
parent
97db017708
commit
28f92f661e
1 changed files with 34 additions and 23 deletions
|
|
@ -3,6 +3,13 @@ from evals.eval_framework.benchmark_adapters.hotpot_qa_adapter import HotpotQAAd
|
||||||
from evals.eval_framework.benchmark_adapters.musique_adapter import MusiqueQAAdapter
|
from evals.eval_framework.benchmark_adapters.musique_adapter import MusiqueQAAdapter
|
||||||
from evals.eval_framework.benchmark_adapters.dummy_adapter import DummyAdapter
|
from evals.eval_framework.benchmark_adapters.dummy_adapter import DummyAdapter
|
||||||
from evals.eval_framework.benchmark_adapters.twowikimultihop_adapter import TwoWikiMultihopAdapter
|
from evals.eval_framework.benchmark_adapters.twowikimultihop_adapter import TwoWikiMultihopAdapter
|
||||||
|
from unittest.mock import patch, mock_open
|
||||||
|
|
||||||
|
|
||||||
|
MOCK_JSONL_DATA = """\
|
||||||
|
{"id": "1", "question": "What is AI?", "answer": "Artificial Intelligence", "paragraphs": [{"paragraph_text": "AI is a field of computer science."}]}
|
||||||
|
{"id": "2", "question": "What is ML?", "answer": "Machine Learning", "paragraphs": [{"paragraph_text": "ML is a subset of AI."}]}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
ADAPTER_CLASSES = [
|
ADAPTER_CLASSES = [
|
||||||
|
|
@ -19,8 +26,18 @@ def test_adapter_can_instantiate_and_load(AdapterClass):
|
||||||
Basic smoke test: instantiate each adapter, call load_corpus with no limit,
|
Basic smoke test: instantiate each adapter, call load_corpus with no limit,
|
||||||
and ensure it returns the expected tuple of (list, list).
|
and ensure it returns the expected tuple of (list, list).
|
||||||
"""
|
"""
|
||||||
adapter = AdapterClass()
|
if AdapterClass == MusiqueQAAdapter:
|
||||||
result = adapter.load_corpus()
|
with (
|
||||||
|
patch.object(MusiqueQAAdapter, "_musique_download_file"),
|
||||||
|
patch("builtins.open", new_callable=mock_open, read_data=MOCK_JSONL_DATA),
|
||||||
|
patch("os.path.exists", return_value=True),
|
||||||
|
):
|
||||||
|
adapter = AdapterClass()
|
||||||
|
result = adapter.load_corpus()
|
||||||
|
|
||||||
|
else:
|
||||||
|
adapter = AdapterClass()
|
||||||
|
result = adapter.load_corpus()
|
||||||
|
|
||||||
assert isinstance(result, tuple), f"{AdapterClass.__name__} did not return a tuple."
|
assert isinstance(result, tuple), f"{AdapterClass.__name__} did not return a tuple."
|
||||||
assert len(result) == 2, f"{AdapterClass.__name__} returned tuple of length != 2."
|
assert len(result) == 2, f"{AdapterClass.__name__} returned tuple of length != 2."
|
||||||
|
|
@ -38,32 +55,26 @@ def test_adapter_returns_some_content(AdapterClass):
|
||||||
Verify that the adapter returns some data and that each QA dict
|
Verify that the adapter returns some data and that each QA dict
|
||||||
at least has a 'question' and 'answer' key (you can extend or remove as needed).
|
at least has a 'question' and 'answer' key (you can extend or remove as needed).
|
||||||
"""
|
"""
|
||||||
adapter = AdapterClass()
|
limit = 3
|
||||||
|
if AdapterClass == MusiqueQAAdapter:
|
||||||
|
with (
|
||||||
|
patch.object(MusiqueQAAdapter, "_musique_download_file"),
|
||||||
|
patch("builtins.open", new_callable=mock_open, read_data=MOCK_JSONL_DATA),
|
||||||
|
patch("os.path.exists", return_value=True),
|
||||||
|
):
|
||||||
|
adapter = AdapterClass()
|
||||||
|
corpus_list, qa_pairs = adapter.load_corpus(limit=limit)
|
||||||
|
else:
|
||||||
|
adapter = AdapterClass()
|
||||||
|
corpus_list, qa_pairs = adapter.load_corpus(limit=limit)
|
||||||
|
|
||||||
corpus_list, qa_pairs = adapter.load_corpus(limit=3) # small limit
|
|
||||||
# We don't know how large the dataset is, but we expect at least 1 item
|
# We don't know how large the dataset is, but we expect at least 1 item
|
||||||
assert len(corpus_list) > 0, f"{AdapterClass.__name__} returned an empty corpus_list."
|
assert len(corpus_list) > 0, f"{AdapterClass.__name__} returned an empty corpus_list."
|
||||||
assert len(qa_pairs) > 0, f"{AdapterClass.__name__} returned an empty question_answer_pairs."
|
assert len(qa_pairs) > 0, f"{AdapterClass.__name__} returned an empty question_answer_pairs."
|
||||||
|
assert len(qa_pairs) <= limit, (
|
||||||
|
f"{AdapterClass.__name__} returned more QA items than requested limit={limit}."
|
||||||
|
)
|
||||||
|
|
||||||
for item in qa_pairs:
|
for item in qa_pairs:
|
||||||
assert "question" in item, f"{AdapterClass.__name__} missing 'question' key in QA pair."
|
assert "question" in item, f"{AdapterClass.__name__} missing 'question' key in QA pair."
|
||||||
assert "answer" in item, f"{AdapterClass.__name__} missing 'answer' key in QA pair."
|
assert "answer" in item, f"{AdapterClass.__name__} missing 'answer' key in QA pair."
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("AdapterClass", ADAPTER_CLASSES)
|
|
||||||
def test_adapter_limit(AdapterClass):
|
|
||||||
"""
|
|
||||||
Check that the `limit` parameter correctly restricts the amount of data returned.
|
|
||||||
We'll test with limit=5.
|
|
||||||
"""
|
|
||||||
adapter = AdapterClass()
|
|
||||||
|
|
||||||
limit = 5
|
|
||||||
corpus_list, qa_pairs = adapter.load_corpus(limit=limit)
|
|
||||||
|
|
||||||
# Confirm that we didn't receive more than 'limit'
|
|
||||||
# (Some adapters might be allowed to return fewer if the dataset is small)
|
|
||||||
|
|
||||||
assert len(qa_pairs) <= limit, (
|
|
||||||
f"{AdapterClass.__name__} returned more QA items than requested limit={limit}."
|
|
||||||
)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue