fix to test

This commit is contained in:
vasilije 2025-07-16 16:19:24 +02:00
parent 8e80d800e4
commit 186555a139
2 changed files with 42 additions and 7 deletions

View file

@ -297,7 +297,7 @@ jobs:
- name: Run Knowledge Graph Quality Test
env:
ENV: 'dev'
LLM_MODEL: 'gpt-4o'
# Model selection is handled by the test with fallback priority
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}

View file

@ -18,14 +18,35 @@ async def test_knowledge_graph_quality_with_gpt4o():
quality of knowledge graph creation after data model changes.
"""
# Configure GPT-4o for best quality
os.environ["LLM_MODEL"] = "gpt-4o"
cognee.config.set_llm_model("gpt-4o")
# Configure model with fallback for better availability
preferred_models = ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"]
selected_model = None
# Ensure we have API key
if not os.environ.get("LLM_API_KEY"):
raise ValueError("LLM_API_KEY must be set for this test")
# Try to find an available model by testing actual availability
from cognee.infrastructure.llm.utils import test_llm_connection
for model in preferred_models:
try:
os.environ["LLM_MODEL"] = model
cognee.config.set_llm_model(model)
# Test the model availability
await test_llm_connection()
selected_model = model
print(f"Successfully using model: {model}")
break
except Exception as e:
print(f"Model {model} not available: {e}")
continue
if not selected_model:
raise ValueError("No suitable model available from: " + ", ".join(preferred_models))
# Set up test directories
data_directory_path = str(
pathlib.Path(
@ -232,10 +253,24 @@ async def test_knowledge_graph_quality_with_gpt4o():
print("QUALITY ASSESSMENT:")
print("-" * 40)
print(f"Model used: {selected_model}")
print(f"Minimum entity coverage threshold: {min_entity_coverage:.1%}")
print(f"Minimum concept coverage threshold: {min_concept_coverage:.1%}")
print()
# We expect high coverage with GPT-4o
min_entity_coverage = 0.70 # At least 70% of entities should be found
min_concept_coverage = 0.60 # At least 60% of concepts should be found
# Adjust quality thresholds based on model capability
if selected_model == "gpt-4o":
min_entity_coverage = 0.70 # At least 70% of entities should be found
min_concept_coverage = 0.60 # At least 60% of concepts should be found
elif selected_model == "gpt-4o-mini":
min_entity_coverage = 0.65 # Slightly lower for mini model
min_concept_coverage = 0.55 # Slightly lower for mini model
elif selected_model == "gpt-4-turbo":
min_entity_coverage = 0.68 # Good performance expected
min_concept_coverage = 0.58 # Good performance expected
else: # gpt-3.5-turbo or other models
min_entity_coverage = 0.60 # Lower threshold for older models
min_concept_coverage = 0.50 # Lower threshold for older models
if entity_coverage >= min_entity_coverage:
print(