added check

2025-07-16 15:51:13 +02:00 · 2025-07-16 15:51:13 +02:00 · b7cf8f2f3c
commit b7cf8f2f3c
parent a06b3fc7e4
1 changed files with 286 additions and 1 deletions
--- a/cognee/tests/test_permissions.py
+++ b/cognee/tests/test_permissions.py
@ -8,6 +8,280 @@ from cognee.modules.search.types import SearchType
 from cognee.modules.users.methods import get_default_user, create_user
 from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
 async def test_knowledge_graph_quality_with_gpt4o():
    """
    Test that verifies all main concepts and entities from a specific document are found
    in the knowledge graph using GPT-4o model for high-quality entity extraction.
    This test addresses the issue where HotPotQA questions may not reflect diminishing
    quality of knowledge graph creation after data model changes.
    """
    # Configure GPT-4o for best quality
    os.environ["LLM_MODEL"] = "gpt-4o"
    cognee.config.set_llm_model("gpt-4o")
    # Ensure we have API key
    if not os.environ.get("LLM_API_KEY"):
        raise ValueError("LLM_API_KEY must be set for this test")
    # Set up test directories
    data_directory_path = str(
        pathlib.Path(
            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_kg_quality")
        ).resolve()
    )
    cognee_directory_path = str(
        pathlib.Path(
            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_kg_quality")
        ).resolve()
    )
    cognee.config.data_root_directory(data_directory_path)
    cognee.config.system_root_directory(cognee_directory_path)
    # Clean up before starting
    await cognee.prune.prune_data()
    await cognee.prune.prune_system(metadata=True)
    # Get test document path
    test_document_path = os.path.join(
        pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
    )
    # Expected entities and concepts from the NLP document
    expected_entities = [
        "Natural language processing",
        "NLP",
        "computer science",
        "information retrieval",
        "machine learning",
        "neural network",
        "speech recognition",
        "natural-language understanding",
        "natural-language generation",
        "theoretical linguistics",
        "text corpora",
        "speech corpora",
        "statistical approaches",
        "probabilistic approaches",
        "rule-based approaches",
        "documents",
        "language",
        "computers",
    ]
    expected_concepts = [
        "NLP is a subfield of computer science",
        "NLP is interdisciplinary",
        "NLP involves processing natural language datasets",
        "NLP uses machine learning approaches",
        "NLP borrows ideas from theoretical linguistics",
        "NLP can extract information from documents",
        "NLP can categorize and organize documents",
        "NLP involves speech recognition",
        "NLP involves natural-language understanding",
        "NLP involves natural-language generation",
        "computers can understand document contents",
        "neural networks are used in NLP",
        "statistical approaches are used in NLP",
    ]
    print("=" * 80)
    print("KNOWLEDGE GRAPH QUALITY TEST WITH GPT-4o")
    print("=" * 80)
    print(f"Using model: {os.environ.get('LLM_MODEL', 'gpt-4o')}")
    print(f"Test document: {test_document_path}")
    print()
    # Add and process the document
    print("Adding document to cognee...")
    await cognee.add([test_document_path], dataset_name="NLP_TEST")
    user = await get_default_user()
    print("Processing document with cognify...")
    await cognee.cognify(["NLP_TEST"], user=user)
    print("Document processing completed.")
    print()
    # Test different search types to find entities and concepts
    search_types_to_test = [
        (SearchType.INSIGHTS, "Get entity relationships and connections"),
        (SearchType.GRAPH_COMPLETION, "Natural language completion with graph context"),
        (SearchType.CHUNKS, "Find relevant document chunks"),
        (SearchType.SUMMARIES, "Get content summaries"),
    ]
    all_found_results = {}
    for search_type, description in search_types_to_test:
        print(f"Testing {search_type.value} search - {description}")
        print("-" * 60)
        # Search for entities
        entity_results = await cognee.search(
            query_type=search_type,
            query_text="What are the main entities, concepts, and terms mentioned in this document?",
            user=user,
            top_k=20,
        )
        # Search for relationships
        relationship_results = await cognee.search(
            query_type=search_type,
            query_text="What are the key relationships and connections between concepts in this document?",
            user=user,
            top_k=20,
        )
        all_found_results[search_type.value] = {
            "entities": entity_results,
            "relationships": relationship_results,
        }
        print(f"Entity search results ({len(entity_results)} items):")
        for i, result in enumerate(entity_results[:3]):  # Show first 3 results
            print(f"  {i + 1}. {result}")
        print(f"Relationship search results ({len(relationship_results)} items):")
        for i, result in enumerate(relationship_results[:3]):  # Show first 3 results
            print(f"  {i + 1}. {result}")
        print()
    # Analyze results and check for expected entities and concepts
    print("ANALYSIS: Expected vs Found")
    print("=" * 80)
    # Combine all results into a single text for analysis
    all_results_text = ""
    for search_type, results in all_found_results.items():
        for result_type, result_list in results.items():
            all_results_text += f" {' '.join(str(r) for r in result_list)}"
    all_results_text = all_results_text.lower()
    print("ENTITY ANALYSIS:")
    print("-" * 40)
    found_entities = []
    missing_entities = []
    for entity in expected_entities:
        entity_lower = entity.lower()
        # Check if entity or its variations are found
        if (
            entity_lower in all_results_text
            or entity_lower.replace("-", " ") in all_results_text
            or entity_lower.replace(" ", "-") in all_results_text
        ):
            found_entities.append(entity)
            print(f"✓ FOUND: {entity}")
        else:
            missing_entities.append(entity)
            print(f"✗ MISSING: {entity}")
    print()
    print("CONCEPT ANALYSIS:")
    print("-" * 40)
    found_concepts = []
    missing_concepts = []
    for concept in expected_concepts:
        concept_lower = concept.lower()
        # Check if key parts of the concept are found
        concept_words = concept_lower.split()
        key_words = [
            word
            for word in concept_words
            if len(word) > 2
            and word not in ["the", "and", "are", "can", "involves", "uses", "from"]
        ]
        if len(key_words) > 0:
            found_key_words = sum(1 for word in key_words if word in all_results_text)
            coverage = found_key_words / len(key_words)
            if coverage >= 0.6:  # At least 60% of key words found
                found_concepts.append(concept)
                print(f"✓ FOUND: {concept} (coverage: {coverage:.1%})")
            else:
                missing_concepts.append(concept)
                print(f"✗ MISSING: {concept} (coverage: {coverage:.1%})")
        else:
            missing_concepts.append(concept)
            print(f"✗ MISSING: {concept} (no key words)")
    print()
    print("SUMMARY:")
    print("=" * 40)
    print(f"Expected entities: {len(expected_entities)}")
    print(f"Found entities: {len(found_entities)}")
    print(f"Missing entities: {len(missing_entities)}")
    print(f"Entity coverage: {len(found_entities) / len(expected_entities):.1%}")
    print()
    print(f"Expected concepts: {len(expected_concepts)}")
    print(f"Found concepts: {len(found_concepts)}")
    print(f"Missing concepts: {len(missing_concepts)}")
    print(f"Concept coverage: {len(found_concepts) / len(expected_concepts):.1%}")
    print()
    # Test assertions
    entity_coverage = len(found_entities) / len(expected_entities)
    concept_coverage = len(found_concepts) / len(expected_concepts)
    print("QUALITY ASSESSMENT:")
    print("-" * 40)
    # We expect high coverage with GPT-4o
    min_entity_coverage = 0.70  # At least 70% of entities should be found
    min_concept_coverage = 0.60  # At least 60% of concepts should be found
    if entity_coverage >= min_entity_coverage:
        print(
            f"✓ PASS: Entity coverage ({entity_coverage:.1%}) meets minimum requirement ({min_entity_coverage:.1%})"
        )
    else:
        print(
            f"✗ FAIL: Entity coverage ({entity_coverage:.1%}) below minimum requirement ({min_entity_coverage:.1%})"
        )
    if concept_coverage >= min_concept_coverage:
        print(
            f"✓ PASS: Concept coverage ({concept_coverage:.1%}) meets minimum requirement ({min_concept_coverage:.1%})"
        )
    else:
        print(
            f"✗ FAIL: Concept coverage ({concept_coverage:.1%}) below minimum requirement ({min_concept_coverage:.1%})"
        )
    overall_quality = (entity_coverage + concept_coverage) / 2
    print(f"Overall quality score: {overall_quality:.1%}")
    # Assert that we have acceptable quality
    assert entity_coverage >= min_entity_coverage, (
        f"Entity coverage {entity_coverage:.1%} below minimum {min_entity_coverage:.1%}"
    )
    assert concept_coverage >= min_concept_coverage, (
        f"Concept coverage {concept_coverage:.1%} below minimum {min_concept_coverage:.1%}"
    )
    print()
    print("=" * 80)
    print("KNOWLEDGE GRAPH QUALITY TEST COMPLETED SUCCESSFULLY")
    print("=" * 80)
    return {
        "entity_coverage": entity_coverage,
        "concept_coverage": concept_coverage,
        "overall_quality": overall_quality,
        "found_entities": found_entities,
        "missing_entities": missing_entities,
        "found_concepts": found_concepts,
        "missing_concepts": missing_concepts,
    }
 logger = get_logger()
@ -197,7 +471,18 @@ async def main():
    await cognee.delete([explanation_file_path], dataset_id=test_user_dataset_id, user=default_user)
 async def main_quality_test():
    """Main function to run the knowledge graph quality test"""
    await test_knowledge_graph_quality_with_gpt4o()
 if __name__ == "__main__":
    import asyncio
    import sys
-    asyncio.run(main())
+    if len(sys.argv) > 1 and sys.argv[1] == "quality":
        print("Running Knowledge Graph Quality Test...")
        asyncio.run(main_quality_test())
    else:
        print("Running Permissions Test...")
        asyncio.run(main())