From e3ae87b0cbccf04c79ce3bdbd98c8d9fb0ae6e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Gil=20L=C3=B3pez?= Date: Tue, 19 Aug 2025 06:44:33 +0000 Subject: [PATCH 1/2] feat: Add diagnostic tool to check initialization status - Add check_initialization.py tool to help developers verify proper setup - Tool checks all storage components and pipeline status - Provides clear feedback on what's missing and how to fix it - Includes demo mode to show before/after initialization - Helps prevent common initialization errors proactively This tool makes it easier for developers to debug initialization issues --- lightrag/tools/check_initialization.py | 176 +++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 lightrag/tools/check_initialization.py diff --git a/lightrag/tools/check_initialization.py b/lightrag/tools/check_initialization.py new file mode 100644 index 00000000..b10e31ba --- /dev/null +++ b/lightrag/tools/check_initialization.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +""" +Diagnostic tool to check LightRAG initialization status. + +This tool helps developers verify that their LightRAG instance is properly +initialized before use, preventing common initialization errors. + +Usage: + python -m lightrag.tools.check_initialization +""" + +import asyncio +import sys +from typing import Optional +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from lightrag import LightRAG +from lightrag.base import StoragesStatus + + +async def check_lightrag_setup(rag_instance: LightRAG, verbose: bool = False) -> bool: + """ + Check if a LightRAG instance is properly initialized. + + Args: + rag_instance: The LightRAG instance to check + verbose: If True, print detailed diagnostic information + + Returns: + True if properly initialized, False otherwise + """ + issues = [] + warnings = [] + + print("šŸ” Checking LightRAG initialization status...\n") + + # Check storage initialization status + if not hasattr(rag_instance, '_storages_status'): + issues.append("LightRAG instance missing _storages_status attribute") + elif rag_instance._storages_status != StoragesStatus.INITIALIZED: + issues.append(f"Storages not initialized (status: {rag_instance._storages_status.name})") + else: + print("āœ… Storage status: INITIALIZED") + + # Check individual storage components + storage_components = [ + ('full_docs', 'Document storage'), + ('text_chunks', 'Text chunks storage'), + ('entities_vdb', 'Entity vector database'), + ('relationships_vdb', 'Relationship vector database'), + ('chunks_vdb', 'Chunks vector database'), + ('doc_status', 'Document status tracker'), + ('llm_response_cache', 'LLM response cache'), + ('full_entities', 'Entity storage'), + ('full_relations', 'Relation storage'), + ('chunk_entity_relation_graph', 'Graph storage') + ] + + if verbose: + print("\nšŸ“¦ Storage Components:") + + for component, description in storage_components: + if not hasattr(rag_instance, component): + issues.append(f"Missing storage component: {component} ({description})") + else: + storage = getattr(rag_instance, component) + if storage is None: + warnings.append(f"Storage {component} is None (might be optional)") + elif hasattr(storage, '_storage_lock'): + if storage._storage_lock is None: + issues.append(f"Storage {component} not initialized (lock is None)") + elif verbose: + print(f" āœ… {description}: Ready") + elif verbose: + print(f" āœ… {description}: Ready") + + # Check pipeline status + try: + from lightrag.kg.shared_storage import get_namespace_data + get_namespace_data("pipeline_status") + print("āœ… Pipeline status: INITIALIZED") + except KeyError: + issues.append("Pipeline status not initialized - call initialize_pipeline_status()") + except Exception as e: + issues.append(f"Error checking pipeline status: {str(e)}") + + # Print results + print("\n" + "=" * 50) + + if issues: + print("āŒ Issues found:\n") + for issue in issues: + print(f" • {issue}") + + print("\nšŸ“ To fix, run this initialization sequence:\n") + print(" await rag.initialize_storages()") + print(" from lightrag.kg.shared_storage import initialize_pipeline_status") + print(" await initialize_pipeline_status()") + print("\nšŸ“š Documentation: https://github.com/HKUDS/LightRAG#important-initialization-requirements") + + if warnings and verbose: + print("\nāš ļø Warnings (might be normal):") + for warning in warnings: + print(f" • {warning}") + + return False + else: + print("āœ… LightRAG is properly initialized and ready to use!") + + if warnings and verbose: + print("\nāš ļø Warnings (might be normal):") + for warning in warnings: + print(f" • {warning}") + + return True + + +async def demo(): + """Demonstrate the diagnostic tool with a test instance.""" + from lightrag.llm.openai import openai_embed, gpt_4o_mini_complete + from lightrag.kg.shared_storage import initialize_pipeline_status + + print("=" * 50) + print("LightRAG Initialization Diagnostic Tool") + print("=" * 50) + + # Create test instance + rag = LightRAG( + working_dir="./test_diagnostic", + embedding_func=openai_embed, + llm_model_func=gpt_4o_mini_complete, + ) + + print("\nšŸ”“ BEFORE initialization:\n") + await check_lightrag_setup(rag, verbose=True) + + print("\n" + "=" * 50) + print("\nšŸ”„ Initializing...\n") + await rag.initialize_storages() + await initialize_pipeline_status() + + print("\n🟢 AFTER initialization:\n") + await check_lightrag_setup(rag, verbose=True) + + # Cleanup + import shutil + shutil.rmtree("./test_diagnostic", ignore_errors=True) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Check LightRAG initialization status" + ) + parser.add_argument( + "--demo", + action="store_true", + help="Run a demonstration with a test instance" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Show detailed diagnostic information" + ) + + args = parser.parse_args() + + if args.demo: + asyncio.run(demo()) + else: + print("Run with --demo to see the diagnostic tool in action") + print("Or import this module and use check_lightrag_setup() with your instance") \ No newline at end of file From 8a293a2c076d374a0c3ae6ef654196aa2959c255 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 23 Aug 2025 02:39:12 +0800 Subject: [PATCH 2/2] Fix linting --- lightrag/tools/check_initialization.py | 106 +++++++++++++------------ 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/lightrag/tools/check_initialization.py b/lightrag/tools/check_initialization.py index b10e31ba..6bcb17e3 100644 --- a/lightrag/tools/check_initialization.py +++ b/lightrag/tools/check_initialization.py @@ -2,7 +2,7 @@ """ Diagnostic tool to check LightRAG initialization status. -This tool helps developers verify that their LightRAG instance is properly +This tool helps developers verify that their LightRAG instance is properly initialized before use, preventing common initialization errors. Usage: @@ -11,7 +11,6 @@ Usage: import asyncio import sys -from typing import Optional from pathlib import Path # Add parent directory to path for imports @@ -24,44 +23,46 @@ from lightrag.base import StoragesStatus async def check_lightrag_setup(rag_instance: LightRAG, verbose: bool = False) -> bool: """ Check if a LightRAG instance is properly initialized. - + Args: rag_instance: The LightRAG instance to check verbose: If True, print detailed diagnostic information - + Returns: True if properly initialized, False otherwise """ issues = [] warnings = [] - + print("šŸ” Checking LightRAG initialization status...\n") - + # Check storage initialization status - if not hasattr(rag_instance, '_storages_status'): + if not hasattr(rag_instance, "_storages_status"): issues.append("LightRAG instance missing _storages_status attribute") elif rag_instance._storages_status != StoragesStatus.INITIALIZED: - issues.append(f"Storages not initialized (status: {rag_instance._storages_status.name})") + issues.append( + f"Storages not initialized (status: {rag_instance._storages_status.name})" + ) else: print("āœ… Storage status: INITIALIZED") - + # Check individual storage components storage_components = [ - ('full_docs', 'Document storage'), - ('text_chunks', 'Text chunks storage'), - ('entities_vdb', 'Entity vector database'), - ('relationships_vdb', 'Relationship vector database'), - ('chunks_vdb', 'Chunks vector database'), - ('doc_status', 'Document status tracker'), - ('llm_response_cache', 'LLM response cache'), - ('full_entities', 'Entity storage'), - ('full_relations', 'Relation storage'), - ('chunk_entity_relation_graph', 'Graph storage') + ("full_docs", "Document storage"), + ("text_chunks", "Text chunks storage"), + ("entities_vdb", "Entity vector database"), + ("relationships_vdb", "Relationship vector database"), + ("chunks_vdb", "Chunks vector database"), + ("doc_status", "Document status tracker"), + ("llm_response_cache", "LLM response cache"), + ("full_entities", "Entity storage"), + ("full_relations", "Relation storage"), + ("chunk_entity_relation_graph", "Graph storage"), ] - + if verbose: print("\nšŸ“¦ Storage Components:") - + for component, description in storage_components: if not hasattr(rag_instance, component): issues.append(f"Missing storage component: {component} ({description})") @@ -69,52 +70,57 @@ async def check_lightrag_setup(rag_instance: LightRAG, verbose: bool = False) -> storage = getattr(rag_instance, component) if storage is None: warnings.append(f"Storage {component} is None (might be optional)") - elif hasattr(storage, '_storage_lock'): + elif hasattr(storage, "_storage_lock"): if storage._storage_lock is None: issues.append(f"Storage {component} not initialized (lock is None)") elif verbose: print(f" āœ… {description}: Ready") elif verbose: print(f" āœ… {description}: Ready") - + # Check pipeline status try: from lightrag.kg.shared_storage import get_namespace_data + get_namespace_data("pipeline_status") print("āœ… Pipeline status: INITIALIZED") except KeyError: - issues.append("Pipeline status not initialized - call initialize_pipeline_status()") + issues.append( + "Pipeline status not initialized - call initialize_pipeline_status()" + ) except Exception as e: issues.append(f"Error checking pipeline status: {str(e)}") - + # Print results print("\n" + "=" * 50) - + if issues: print("āŒ Issues found:\n") for issue in issues: print(f" • {issue}") - + print("\nšŸ“ To fix, run this initialization sequence:\n") print(" await rag.initialize_storages()") print(" from lightrag.kg.shared_storage import initialize_pipeline_status") print(" await initialize_pipeline_status()") - print("\nšŸ“š Documentation: https://github.com/HKUDS/LightRAG#important-initialization-requirements") - + print( + "\nšŸ“š Documentation: https://github.com/HKUDS/LightRAG#important-initialization-requirements" + ) + if warnings and verbose: print("\nāš ļø Warnings (might be normal):") for warning in warnings: print(f" • {warning}") - + return False else: print("āœ… LightRAG is properly initialized and ready to use!") - + if warnings and verbose: print("\nāš ļø Warnings (might be normal):") for warning in warnings: print(f" • {warning}") - + return True @@ -122,55 +128,53 @@ async def demo(): """Demonstrate the diagnostic tool with a test instance.""" from lightrag.llm.openai import openai_embed, gpt_4o_mini_complete from lightrag.kg.shared_storage import initialize_pipeline_status - + print("=" * 50) print("LightRAG Initialization Diagnostic Tool") print("=" * 50) - + # Create test instance rag = LightRAG( working_dir="./test_diagnostic", embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) - + print("\nšŸ”“ BEFORE initialization:\n") await check_lightrag_setup(rag, verbose=True) - + print("\n" + "=" * 50) print("\nšŸ”„ Initializing...\n") await rag.initialize_storages() await initialize_pipeline_status() - + print("\n🟢 AFTER initialization:\n") await check_lightrag_setup(rag, verbose=True) - + # Cleanup import shutil + shutil.rmtree("./test_diagnostic", ignore_errors=True) if __name__ == "__main__": import argparse - - parser = argparse.ArgumentParser( - description="Check LightRAG initialization status" + + parser = argparse.ArgumentParser(description="Check LightRAG initialization status") + parser.add_argument( + "--demo", action="store_true", help="Run a demonstration with a test instance" ) parser.add_argument( - "--demo", + "--verbose", + "-v", action="store_true", - help="Run a demonstration with a test instance" + help="Show detailed diagnostic information", ) - parser.add_argument( - "--verbose", "-v", - action="store_true", - help="Show detailed diagnostic information" - ) - + args = parser.parse_args() - + if args.demo: asyncio.run(demo()) else: print("Run with --demo to see the diagnostic tool in action") - print("Or import this module and use check_lightrag_setup() with your instance") \ No newline at end of file + print("Or import this module and use check_lightrag_setup() with your instance")