diff --git a/README-zh.md b/README-zh.md index 685c9468..25273399 100644 --- a/README-zh.md +++ b/README-zh.md @@ -1108,40 +1108,98 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现 pip install raganything ``` 2. 处理多模态文档: - ```python - import asyncio - from raganything import RAGAnything - from lightrag.llm.openai import openai_complete_if_cache, openai_embed +
+ RAGAnything 使用实例 + ```python + import asyncio + from raganything import RAGAnything + from lightrag import LightRAG + from lightrag.llm.openai import openai_complete_if_cache, openai_embed + from lightrag.utils import EmbeddingFunc + import os - async def main(): - # 使用LightRAG集成初始化RAGAnything - rag = RAGAnything( - working_dir="./rag_storage", - llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache( - "gpt-4o-mini", prompt, api_key="your-api-key", **kwargs - ), - embedding_func=lambda texts: openai_embed( - texts, model="text-embedding-3-large", api_key="your-api-key" - ), - embedding_dim=3072, - ) + async def load_existing_lightrag(): + # 首先,创建或加载现有的 LightRAG 实例 + lightrag_working_dir = "./existing_lightrag_storage" - # 处理多模态文档 - await rag.process_document_complete( - file_path="path/to/your/document.pdf", - output_dir="./output" - ) + # 检查是否存在之前的 LightRAG 实例 + if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): + print("✅ Found existing LightRAG instance, loading...") + else: + print("❌ No existing LightRAG instance found, will create new one") - # 查询多模态内容 - result = await rag.query_with_multimodal( - "图表中显示的主要发现是什么?", - mode="hybrid" - ) - print(result) + # 使用您的配置创建/加载 LightRAG 实例 + lightrag_instance = LightRAG( + working_dir=lightrag_working_dir, + llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( + "gpt-4o-mini", + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key="your-api-key", + **kwargs, + ), + embedding_func=EmbeddingFunc( + embedding_dim=3072, + max_token_size=8192, + func=lambda texts: openai_embed( + texts, + model="text-embedding-3-large", + api_key=api_key, + base_url=base_url, + ), + ) + ) - if __name__ == "__main__": - asyncio.run(main()) - ``` + # 初始化存储(如果有现有数据,这将加载现有数据) + await lightrag_instance.initialize_storages() + + # 现在使用现有的 LightRAG 实例初始化 RAGAnything + rag = RAGAnything( + lightrag=lightrag_instance, # 传递现有的 LightRAG 实例 + # 仅需要视觉模型用于多模态处理 + vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache( + "gpt-4o", + "", + system_prompt=None, + history_messages=[], + messages=[ + {"role": "system", "content": system_prompt} if system_prompt else None, + {"role": "user", "content": [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}} + ]} if image_data else {"role": "user", "content": prompt} + ], + api_key="your-api-key", + **kwargs, + ) if image_data else openai_complete_if_cache( + "gpt-4o-mini", + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key="your-api-key", + **kwargs, + ) + # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承 + ) + + # 查询现有的知识库 + result = await rag.query_with_multimodal( + "What data has been processed in this LightRAG instance?", + mode="hybrid" + ) + print("Query result:", result) + + # 向现有的 LightRAG 实例添加新的多模态文档 + await rag.process_document_complete( + file_path="path/to/new/multimodal_document.pdf", + output_dir="./output" + ) + + if __name__ == "__main__": + asyncio.run(load_existing_lightrag()) + ``` +
如需详细文档和高级用法,请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。 diff --git a/README.md b/README.md index 5765e469..2c9e08b8 100644 --- a/README.md +++ b/README.md @@ -1159,99 +1159,98 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/ pip install raganything ``` 2. Process multimodal documents: +
+ RAGAnything Usage Example + ```python + import asyncio + from raganything import RAGAnything + from lightrag import LightRAG + from lightrag.llm.openai import openai_complete_if_cache, openai_embed + from lightrag.utils import EmbeddingFunc + import os -
- RAGAnything Usage Example - ```python - import asyncio - from raganything import RAGAnything - from lightrag import LightRAG - from lightrag.llm.openai import openai_complete_if_cache, openai_embed - from lightrag.utils import EmbeddingFunc - import os + async def load_existing_lightrag(): + # First, create or load an existing LightRAG instance + lightrag_working_dir = "./existing_lightrag_storage" - async def load_existing_lightrag(): - # First, create or load an existing LightRAG instance - lightrag_working_dir = "./existing_lightrag_storage" + # Check if previous LightRAG instance exists + if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): + print("✅ Found existing LightRAG instance, loading...") + else: + print("❌ No existing LightRAG instance found, will create new one") - # Check if previous LightRAG instance exists - if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): - print("✅ Found existing LightRAG instance, loading...") - else: - print("❌ No existing LightRAG instance found, will create new one") - - # Create/Load LightRAG instance with your configurations - lightrag_instance = LightRAG( - working_dir=lightrag_working_dir, - llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( - "gpt-4o-mini", - prompt, - system_prompt=system_prompt, - history_messages=history_messages, - api_key="your-api-key", - **kwargs, - ), - embedding_func=EmbeddingFunc( - embedding_dim=3072, - max_token_size=8192, - func=lambda texts: openai_embed( - texts, - model="text-embedding-3-large", - api_key=api_key, - base_url=base_url, + # Create/Load LightRAG instance with your configurations + lightrag_instance = LightRAG( + working_dir=lightrag_working_dir, + llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( + "gpt-4o-mini", + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key="your-api-key", + **kwargs, ), + embedding_func=EmbeddingFunc( + embedding_dim=3072, + max_token_size=8192, + func=lambda texts: openai_embed( + texts, + model="text-embedding-3-large", + api_key=api_key, + base_url=base_url, + ), + ) ) - ) - # Initialize storage (this will load existing data if available) - await lightrag_instance.initialize_storages() + # Initialize storage (this will load existing data if available) + await lightrag_instance.initialize_storages() - # Now initialize RAGAnything with the existing LightRAG instance - rag = RAGAnything( - lightrag=lightrag_instance, # Pass the existing LightRAG instance - # Only need vision model for multimodal processing - vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache( - "gpt-4o", - "", - system_prompt=None, - history_messages=[], - messages=[ - {"role": "system", "content": system_prompt} if system_prompt else None, - {"role": "user", "content": [ - {"type": "text", "text": prompt}, - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}} - ]} if image_data else {"role": "user", "content": prompt} - ], - api_key="your-api-key", - **kwargs, - ) if image_data else openai_complete_if_cache( - "gpt-4o-mini", - prompt, - system_prompt=system_prompt, - history_messages=history_messages, - api_key="your-api-key", - **kwargs, + # Now initialize RAGAnything with the existing LightRAG instance + rag = RAGAnything( + lightrag=lightrag_instance, # Pass the existing LightRAG instance + # Only need vision model for multimodal processing + vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache( + "gpt-4o", + "", + system_prompt=None, + history_messages=[], + messages=[ + {"role": "system", "content": system_prompt} if system_prompt else None, + {"role": "user", "content": [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}} + ]} if image_data else {"role": "user", "content": prompt} + ], + api_key="your-api-key", + **kwargs, + ) if image_data else openai_complete_if_cache( + "gpt-4o-mini", + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key="your-api-key", + **kwargs, + ) + # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance ) - # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance - ) - # Query the existing knowledge base - result = await rag.query_with_multimodal( - "What data has been processed in this LightRAG instance?", - mode="hybrid" - ) - print("Query result:", result) + # Query the existing knowledge base + result = await rag.query_with_multimodal( + "What data has been processed in this LightRAG instance?", + mode="hybrid" + ) + print("Query result:", result) - # Add new multimodal documents to the existing LightRAG instance - await rag.process_document_complete( - file_path="path/to/new/multimodal_document.pdf", - output_dir="./output" - ) + # Add new multimodal documents to the existing LightRAG instance + await rag.process_document_complete( + file_path="path/to/new/multimodal_document.pdf", + output_dir="./output" + ) - if __name__ == "__main__": - asyncio.run(load_existing_lightrag()) - ``` -
+ if __name__ == "__main__": + asyncio.run(load_existing_lightrag()) + ``` +
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).