From 06efab4af270905d83b5e3145626ed3a47808e25 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 3 Aug 2025 12:12:13 +0800 Subject: [PATCH] Revert "Remove auto_manage_storages_states option" This reverts commit bfe6657b316f7e50bc9c5f0cc71d9fbb2b605ddd. --- README-zh.md | 8 +++++ README.md | 40 +++++++++++++++------ docs/rerank_integration.md | 1 + examples/lightrag_openai_compatible_demo.py | 33 ++++++----------- lightrag/api/lightrag_server.py | 5 +++ lightrag/lightrag.py | 11 +++--- reproduce/Step_1.py | 1 + reproduce/Step_1_openai_compatible.py | 1 + 8 files changed, 60 insertions(+), 40 deletions(-) diff --git a/README-zh.md b/README-zh.md index 2e240375..2d08ff51 100644 --- a/README-zh.md +++ b/README-zh.md @@ -204,6 +204,7 @@ async def initialize_rag(): embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) + await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -399,6 +400,7 @@ async def initialize_rag(): ) ) + await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -545,6 +547,7 @@ async def initialize_rag(): ), ) + await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -762,6 +765,8 @@ async def initialize_rag(): graph_storage="Neo4JStorage", #<-----------覆盖KG默认值 ) + # 初始化数据库连接 + await rag.initialize_storages() # 初始化文档处理的管道状态 await initialize_pipeline_status() @@ -1188,6 +1193,9 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现 ) ) + # 初始化存储(如果有现有数据,这将加载现有数据) + await lightrag_instance.initialize_storages() + # 现在使用现有的 LightRAG 实例初始化 RAGAnything rag = RAGAnything( lightrag=lightrag_instance, # 传递现有的 LightRAG 实例 diff --git a/README.md b/README.md index 9172b621..bf319763 100644 --- a/README.md +++ b/README.md @@ -181,7 +181,8 @@ For a streaming response implementation example, please see `examples/lightrag_o ### ⚠️ Important: Initialization Requirements -**LightRAG requires explicit initialization before use.** You must call `await initialize_pipeline_status()` after creating a LightRAG instance, otherwise you will encounter errors like: +**LightRAG requires explicit initialization before use.** You must call both `await rag.initialize_storages()` and `await initialize_pipeline_status()` after creating a LightRAG instance, otherwise you will encounter errors like: +- `AttributeError: __aenter__` - if storages are not initialized - `KeyError: 'history_messages'` - if pipeline status is not initialized ### A Simple Program @@ -208,8 +209,9 @@ async def initialize_rag(): embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) - # IMPORTANT: Initialize document processing pipeline status is required! - await initialize_pipeline_status() # + # IMPORTANT: Both initialization calls are required! + await rag.initialize_storages() # Initialize storage backends + await initialize_pipeline_status() # Initialize processing pipeline return rag async def main(): @@ -399,6 +401,7 @@ async def initialize_rag(): ) ) + await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -547,6 +550,7 @@ async def initialize_rag(): ), ) + await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -772,6 +776,8 @@ async def initialize_rag(): graph_storage="Neo4JStorage", #<-----------override KG default ) + # Initialize database connections + await rag.initialize_storages() # Initialize pipeline status for document processing await initialize_pipeline_status() @@ -856,6 +862,8 @@ async def initialize_rag(): graph_storage="MemgraphStorage", #<-----------override KG default ) + # Initialize database connections + await rag.initialize_storages() # Initialize pipeline status for document processing await initialize_pipeline_status() @@ -1231,6 +1239,8 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/ ), ) ) + # Initialize storage (this will load existing data if available) + await lightrag_instance.initialize_storages() # Now initialize RAGAnything with the existing LightRAG instance rag = RAGAnything( lightrag=lightrag_instance, # Pass the existing LightRAG instance @@ -1423,16 +1433,24 @@ Valid modes are: ### Common Initialization Errors -If you encounter the following error when using LightRAG: +If you encounter these errors when using LightRAG: -- **`KeyError: 'history_messages'`** -- **Cause**: Pipeline status not initialized -- **Solution**: Call `await initialize_pipeline_status()` after initializing storages +1. **`AttributeError: __aenter__`** + - **Cause**: Storage backends not initialized + - **Solution**: Call `await rag.initialize_storages()` after creating the LightRAG instance -```python -rag = LightRAG(...) -await initialize_pipeline_status() -``` +2. **`KeyError: 'history_messages'`** + - **Cause**: Pipeline status not initialized + - **Solution**: Call `await initialize_pipeline_status()` after initializing storages + +3. **Both errors in sequence** + - **Cause**: Neither initialization method was called + - **Solution**: Always follow this pattern: + ```python + rag = LightRAG(...) + await rag.initialize_storages() + await initialize_pipeline_status() + ``` ### Model Switching Issues diff --git a/docs/rerank_integration.md b/docs/rerank_integration.md index 36058c43..0e6c5169 100644 --- a/docs/rerank_integration.md +++ b/docs/rerank_integration.md @@ -174,6 +174,7 @@ async def main(): rerank_model_func=my_rerank_func, ) + await rag.initialize_storages() await initialize_pipeline_status() # Insert documents diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py index 0816fb31..15187d25 100644 --- a/examples/lightrag_openai_compatible_demo.py +++ b/examples/lightrag_openai_compatible_demo.py @@ -4,7 +4,7 @@ import inspect import logging import logging.config from lightrag import LightRAG, QueryParam -from lightrag.llm.openai import openai_complete_if_cache, openai_embed +from lightrag.llm.openai import openai_complete_if_cache from lightrag.llm.ollama import ollama_embed from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug from lightrag.kg.shared_storage import initialize_pipeline_status @@ -99,26 +99,6 @@ async def llm_model_func( ) -ollama_embedding_func = EmbeddingFunc( - embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), - func=lambda texts: ollama_embed( - texts, - embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"), - host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"), - ), -) - -openai_embedding_func = EmbeddingFunc( - embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), - func=lambda texts: openai_embed( - texts, - model=os.getenv("EMBEDDING_MODEL", "BAAI/bge-m3"), - base_url=os.getenv("EMBEDDING_BINDING_HOST", "https://api.deepseek.com"), - api_key=os.getenv("EMBEDDING_BINDING_API_KEY") or os.getenv("OPENAI_API_KEY"), - ), -) - - async def print_stream(stream): async for chunk in stream: if chunk: @@ -129,9 +109,18 @@ async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, - embedding_func=openai_embedding_func, + embedding_func=EmbeddingFunc( + embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), + max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "8192")), + func=lambda texts: ollama_embed( + texts, + embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"), + host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"), + ), + ), ) + await rag.initialize_storages() await initialize_pipeline_status() return rag diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 0fc098bb..606becb6 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -149,6 +149,9 @@ def create_app(args): app.state.background_tasks = set() try: + # Initialize database connections + await rag.initialize_storages() + await initialize_pipeline_status() pipeline_status = await get_namespace_data("pipeline_status") @@ -398,6 +401,7 @@ def create_app(args): enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache=args.enable_llm_cache, rerank_model_func=rerank_model_func, + auto_manage_storages_states=False, max_parallel_insert=args.max_parallel_insert, max_graph_nodes=args.max_graph_nodes, addon_params={"language": args.summary_language}, @@ -427,6 +431,7 @@ def create_app(args): enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache=args.enable_llm_cache, rerank_model_func=rerank_model_func, + auto_manage_storages_states=False, max_parallel_insert=args.max_parallel_insert, max_graph_nodes=args.max_graph_nodes, addon_params={"language": args.summary_language}, diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index b46cae1b..73386b0b 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -334,7 +334,6 @@ class LightRAG: # Storages Management # --- - # TODO: Deprecated (LightRAG will always manage storages states) auto_manage_storages_states: bool = field(default=True) """If True, lightrag will automatically calls initialize_storages and finalize_storages at the appropriate times.""" @@ -532,14 +531,12 @@ class LightRAG: self._storages_status = StoragesStatus.CREATED - # Initialize storages - self._run_async_safely(self.initialize_storages, "Storage Initialization") - - # Check and perform data migration if needed - self._run_async_safely(self._check_and_migrate_data, "Data Migration Check") + if self.auto_manage_storages_states: + self._run_async_safely(self.initialize_storages, "Storage Initialization") def __del__(self): - self._run_async_safely(self.finalize_storages, "Storage Finalization") + if self.auto_manage_storages_states: + self._run_async_safely(self.finalize_storages, "Storage Finalization") def _run_async_safely(self, async_func, action_name=""): """Safely execute an async function, avoiding event loop conflicts.""" diff --git a/reproduce/Step_1.py b/reproduce/Step_1.py index d74b358a..c94015ad 100644 --- a/reproduce/Step_1.py +++ b/reproduce/Step_1.py @@ -35,6 +35,7 @@ if not os.path.exists(WORKING_DIR): async def initialize_rag(): rag = LightRAG(working_dir=WORKING_DIR) + await rag.initialize_storages() await initialize_pipeline_status() return rag diff --git a/reproduce/Step_1_openai_compatible.py b/reproduce/Step_1_openai_compatible.py index 4040810d..8093a9ee 100644 --- a/reproduce/Step_1_openai_compatible.py +++ b/reproduce/Step_1_openai_compatible.py @@ -70,6 +70,7 @@ async def initialize_rag(): embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func), ) + await rag.initialize_storages() await initialize_pipeline_status() return rag