diff --git a/README-zh.md b/README-zh.md index 2d08ff51..2e240375 100644 --- a/README-zh.md +++ b/README-zh.md @@ -204,7 +204,6 @@ async def initialize_rag(): embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) - await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -400,7 +399,6 @@ async def initialize_rag(): ) ) - await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -547,7 +545,6 @@ async def initialize_rag(): ), ) - await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -765,8 +762,6 @@ async def initialize_rag(): graph_storage="Neo4JStorage", #<-----------覆盖KG默认值 ) - # 初始化数据库连接 - await rag.initialize_storages() # 初始化文档处理的管道状态 await initialize_pipeline_status() @@ -1193,9 +1188,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现 ) ) - # 初始化存储(如果有现有数据,这将加载现有数据) - await lightrag_instance.initialize_storages() - # 现在使用现有的 LightRAG 实例初始化 RAGAnything rag = RAGAnything( lightrag=lightrag_instance, # 传递现有的 LightRAG 实例 diff --git a/README.md b/README.md index bf319763..9172b621 100644 --- a/README.md +++ b/README.md @@ -181,8 +181,7 @@ For a streaming response implementation example, please see `examples/lightrag_o ### ⚠️ Important: Initialization Requirements -**LightRAG requires explicit initialization before use.** You must call both `await rag.initialize_storages()` and `await initialize_pipeline_status()` after creating a LightRAG instance, otherwise you will encounter errors like: -- `AttributeError: __aenter__` - if storages are not initialized +**LightRAG requires explicit initialization before use.** You must call `await initialize_pipeline_status()` after creating a LightRAG instance, otherwise you will encounter errors like: - `KeyError: 'history_messages'` - if pipeline status is not initialized ### A Simple Program @@ -209,9 +208,8 @@ async def initialize_rag(): embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) - # IMPORTANT: Both initialization calls are required! - await rag.initialize_storages() # Initialize storage backends - await initialize_pipeline_status() # Initialize processing pipeline + # IMPORTANT: Initialize document processing pipeline status is required! + await initialize_pipeline_status() # return rag async def main(): @@ -401,7 +399,6 @@ async def initialize_rag(): ) ) - await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -550,7 +547,6 @@ async def initialize_rag(): ), ) - await rag.initialize_storages() await initialize_pipeline_status() return rag @@ -776,8 +772,6 @@ async def initialize_rag(): graph_storage="Neo4JStorage", #<-----------override KG default ) - # Initialize database connections - await rag.initialize_storages() # Initialize pipeline status for document processing await initialize_pipeline_status() @@ -862,8 +856,6 @@ async def initialize_rag(): graph_storage="MemgraphStorage", #<-----------override KG default ) - # Initialize database connections - await rag.initialize_storages() # Initialize pipeline status for document processing await initialize_pipeline_status() @@ -1239,8 +1231,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/ ), ) ) - # Initialize storage (this will load existing data if available) - await lightrag_instance.initialize_storages() # Now initialize RAGAnything with the existing LightRAG instance rag = RAGAnything( lightrag=lightrag_instance, # Pass the existing LightRAG instance @@ -1433,24 +1423,16 @@ Valid modes are: ### Common Initialization Errors -If you encounter these errors when using LightRAG: +If you encounter the following error when using LightRAG: -1. **`AttributeError: __aenter__`** - - **Cause**: Storage backends not initialized - - **Solution**: Call `await rag.initialize_storages()` after creating the LightRAG instance +- **`KeyError: 'history_messages'`** +- **Cause**: Pipeline status not initialized +- **Solution**: Call `await initialize_pipeline_status()` after initializing storages -2. **`KeyError: 'history_messages'`** - - **Cause**: Pipeline status not initialized - - **Solution**: Call `await initialize_pipeline_status()` after initializing storages - -3. **Both errors in sequence** - - **Cause**: Neither initialization method was called - - **Solution**: Always follow this pattern: - ```python - rag = LightRAG(...) - await rag.initialize_storages() - await initialize_pipeline_status() - ``` +```python +rag = LightRAG(...) +await initialize_pipeline_status() +``` ### Model Switching Issues diff --git a/docs/rerank_integration.md b/docs/rerank_integration.md index 0e6c5169..36058c43 100644 --- a/docs/rerank_integration.md +++ b/docs/rerank_integration.md @@ -174,7 +174,6 @@ async def main(): rerank_model_func=my_rerank_func, ) - await rag.initialize_storages() await initialize_pipeline_status() # Insert documents diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py index 15187d25..0816fb31 100644 --- a/examples/lightrag_openai_compatible_demo.py +++ b/examples/lightrag_openai_compatible_demo.py @@ -4,7 +4,7 @@ import inspect import logging import logging.config from lightrag import LightRAG, QueryParam -from lightrag.llm.openai import openai_complete_if_cache +from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.llm.ollama import ollama_embed from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug from lightrag.kg.shared_storage import initialize_pipeline_status @@ -99,6 +99,26 @@ async def llm_model_func( ) +ollama_embedding_func = EmbeddingFunc( + embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), + func=lambda texts: ollama_embed( + texts, + embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"), + host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"), + ), +) + +openai_embedding_func = EmbeddingFunc( + embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), + func=lambda texts: openai_embed( + texts, + model=os.getenv("EMBEDDING_MODEL", "BAAI/bge-m3"), + base_url=os.getenv("EMBEDDING_BINDING_HOST", "https://api.deepseek.com"), + api_key=os.getenv("EMBEDDING_BINDING_API_KEY") or os.getenv("OPENAI_API_KEY"), + ), +) + + async def print_stream(stream): async for chunk in stream: if chunk: @@ -109,18 +129,9 @@ async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), - max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "8192")), - func=lambda texts: ollama_embed( - texts, - embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"), - host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"), - ), - ), + embedding_func=openai_embedding_func, ) - await rag.initialize_storages() await initialize_pipeline_status() return rag diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 606becb6..0fc098bb 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -149,9 +149,6 @@ def create_app(args): app.state.background_tasks = set() try: - # Initialize database connections - await rag.initialize_storages() - await initialize_pipeline_status() pipeline_status = await get_namespace_data("pipeline_status") @@ -401,7 +398,6 @@ def create_app(args): enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache=args.enable_llm_cache, rerank_model_func=rerank_model_func, - auto_manage_storages_states=False, max_parallel_insert=args.max_parallel_insert, max_graph_nodes=args.max_graph_nodes, addon_params={"language": args.summary_language}, @@ -431,7 +427,6 @@ def create_app(args): enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache=args.enable_llm_cache, rerank_model_func=rerank_model_func, - auto_manage_storages_states=False, max_parallel_insert=args.max_parallel_insert, max_graph_nodes=args.max_graph_nodes, addon_params={"language": args.summary_language}, diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 73386b0b..b46cae1b 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -334,6 +334,7 @@ class LightRAG: # Storages Management # --- + # TODO: Deprecated (LightRAG will always manage storages states) auto_manage_storages_states: bool = field(default=True) """If True, lightrag will automatically calls initialize_storages and finalize_storages at the appropriate times.""" @@ -531,12 +532,14 @@ class LightRAG: self._storages_status = StoragesStatus.CREATED - if self.auto_manage_storages_states: - self._run_async_safely(self.initialize_storages, "Storage Initialization") + # Initialize storages + self._run_async_safely(self.initialize_storages, "Storage Initialization") + + # Check and perform data migration if needed + self._run_async_safely(self._check_and_migrate_data, "Data Migration Check") def __del__(self): - if self.auto_manage_storages_states: - self._run_async_safely(self.finalize_storages, "Storage Finalization") + self._run_async_safely(self.finalize_storages, "Storage Finalization") def _run_async_safely(self, async_func, action_name=""): """Safely execute an async function, avoiding event loop conflicts.""" diff --git a/reproduce/Step_1.py b/reproduce/Step_1.py index c94015ad..d74b358a 100644 --- a/reproduce/Step_1.py +++ b/reproduce/Step_1.py @@ -35,7 +35,6 @@ if not os.path.exists(WORKING_DIR): async def initialize_rag(): rag = LightRAG(working_dir=WORKING_DIR) - await rag.initialize_storages() await initialize_pipeline_status() return rag diff --git a/reproduce/Step_1_openai_compatible.py b/reproduce/Step_1_openai_compatible.py index 8093a9ee..4040810d 100644 --- a/reproduce/Step_1_openai_compatible.py +++ b/reproduce/Step_1_openai_compatible.py @@ -70,7 +70,6 @@ async def initialize_rag(): embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func), ) - await rag.initialize_storages() await initialize_pipeline_status() return rag