cognee/level_3/rag_test_manager.py
2023-10-05 17:15:45 +02:00

106 lines
No EOL
3.9 KiB
Python

from deepeval.metrics.overall_score import OverallScoreMetric
from deepeval.test_case import LLMTestCase
from deepeval.run_test import assert_test, run_test
import uuid
def retrieve_test_cases():
"""Retrieve test cases from a database or a file."""
pass
def check_params(chunk_size, chunk_overlap, chunk_strategy, loader_strategy, query, context, metadata):
"""Check parameters for test case runs and set defaults if necessary."""
pass
def run_load(test_id, document, **kwargs):
"""Run load for the given test_id and document with other parameters."""
pass
def compare_output(output, expected_output):
"""Compare the output against the expected output."""
pass
def generate_param_variants(base_params):
"""Generate parameter variants for testing."""
params_variants = [
{'chunk_size': base_params['chunk_size'] + i} for i in range(1, 4)
] + [
{'chunk_overlap': base_params['chunk_overlap'] + i} for i in range(1, 4)
]
# Add more parameter variations here as needed
return params_variants
def run_tests_with_variants(document, base_params, param_variants, expected_output):
"""Run tests with various parameter variants and validate the output."""
for variant in param_variants:
test_id = str(uuid.uuid4()) # Set new test id
updated_params = {**base_params, **variant} # Update parameters
output = run_load(test_id, document, **updated_params) # Run load with varied parameters
compare_output(output, expected_output) # Validate output
def run_rag_tests(document, chunk_size, chunk_overlap, chunk_strategy, loader_strategy, query, output, expected_output,
context, metadata):
"""Run RAG tests with various scenarios and parameter variants."""
test_cases = retrieve_test_cases() # Retrieve test cases
# Check and set parameters
base_params = check_params(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
chunk_strategy=chunk_strategy,
loader_strategy=loader_strategy,
query=query,
context=context,
metadata=metadata
)
# Set test id and run initial load test
test_id = str(uuid.uuid4())
output = run_load(test_id, document, **base_params)
compare_output(output, expected_output)
# Generate parameter variants for further tests
param_variants = generate_param_variants(base_params)
# Run tests with varied parameters for the single document
run_tests_with_variants(document, base_params, param_variants, expected_output)
# Assuming two documents are concatenated and treated as one
combined_document = document + document
# Run initial load test for combined document
output = run_load(test_id, combined_document, **base_params)
compare_output(output, expected_output)
# Run tests with varied parameters for the combined document
run_tests_with_variants(combined_document, base_params, param_variants, expected_output)
def test_0():
query = "How does photosynthesis work?"
output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods with the help of chlorophyll pigment."
expected_output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize food with the help of chlorophyll pigment."
context = "Biology"
test_case = LLMTestCase(
query=query,
output=output,
expected_output=expected_output,
context=context,
)
metric = OverallScoreMetric()
# if you want to make sure that the test returns an error
assert_test(test_case, metrics=[metric])
# If you want to run the test
test_result = run_test(test_case, metrics=[metric])
# You can also inspect the test result class
print(test_result)
print(test_result)