Update README

2025-06-26 16:17:00 +08:00 · 2025-06-26 16:17:00 +08:00 · fc7a0329df
commit fc7a0329df
parent 145e3a238b
2 changed files with 169 additions and 112 deletions
--- a/README-zh.md
+++ b/README-zh.md
@ -1108,40 +1108,98 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
   pip install raganything
   ```
 2. 处理多模态文档：
-   ```python
+    <details>
-   import asyncio
+    <summary> <b> RAGAnything 使用实例 </b></summary>
-   from raganything import RAGAnything
+        ```python
-   from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+        import asyncio
        from raganything import RAGAnything
        from lightrag import LightRAG
        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
        from lightrag.utils import EmbeddingFunc
        import os
-   async def main():
+        async def load_existing_lightrag():
-       # 使用LightRAG集成初始化RAGAnything
+            # 首先，创建或加载现有的 LightRAG 实例
-       rag = RAGAnything(
+            lightrag_working_dir = "./existing_lightrag_storage"
           working_dir="./rag_storage",
           llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache(
               "gpt-4o-mini", prompt, api_key="your-api-key", **kwargs
           ),
           embedding_func=lambda texts: openai_embed(
               texts, model="text-embedding-3-large", api_key="your-api-key"
           ),
           embedding_dim=3072,
       )
-       # 处理多模态文档
+            # 检查是否存在之前的 LightRAG 实例
-       await rag.process_document_complete(
+            if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
-           file_path="path/to/your/document.pdf",
+                print("✅ Found existing LightRAG instance, loading...")
-           output_dir="./output"
+            else:
-       )
+                print("❌ No existing LightRAG instance found, will create new one")
-       # 查询多模态内容
+            # 使用您的配置创建/加载 LightRAG 实例
-       result = await rag.query_with_multimodal(
+            lightrag_instance = LightRAG(
-           "图表中显示的主要发现是什么？",
+                working_dir=lightrag_working_dir,
-           mode="hybrid"
+                llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
-       )
+                    "gpt-4o-mini",
-       print(result)
+                    prompt,
                    system_prompt=system_prompt,
                    history_messages=history_messages,
                    api_key="your-api-key",
                    **kwargs,
                ),
                embedding_func=EmbeddingFunc(
                    embedding_dim=3072,
                    max_token_size=8192,
                    func=lambda texts: openai_embed(
                        texts,
                        model="text-embedding-3-large",
                        api_key=api_key,
                        base_url=base_url,
                    ),
                )
            )
-   if __name__ == "__main__":
+            # 初始化存储（如果有现有数据，这将加载现有数据）
-       asyncio.run(main())
+            await lightrag_instance.initialize_storages()
-   ```
+
            # 现在使用现有的 LightRAG 实例初始化 RAGAnything
            rag = RAGAnything(
                lightrag=lightrag_instance,  # 传递现有的 LightRAG 实例
                # 仅需要视觉模型用于多模态处理
                vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
                    "gpt-4o",
                    "",
                    system_prompt=None,
                    history_messages=[],
                    messages=[
                        {"role": "system", "content": system_prompt} if system_prompt else None,
                        {"role": "user", "content": [
                            {"type": "text", "text": prompt},
                            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
                        ]} if image_data else {"role": "user", "content": prompt}
                    ],
                    api_key="your-api-key",
                    **kwargs,
                ) if image_data else openai_complete_if_cache(
                    "gpt-4o-mini",
                    prompt,
                    system_prompt=system_prompt,
                    history_messages=history_messages,
                    api_key="your-api-key",
                    **kwargs,
                )
                # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
            )
            # 查询现有的知识库
            result = await rag.query_with_multimodal(
                "What data has been processed in this LightRAG instance?",
                mode="hybrid"
            )
            print("Query result:", result)
            # 向现有的 LightRAG 实例添加新的多模态文档
            await rag.process_document_complete(
                file_path="path/to/new/multimodal_document.pdf",
                output_dir="./output"
            )
        if __name__ == "__main__":
            asyncio.run(load_existing_lightrag())
        ```
    </details>
 如需详细文档和高级用法，请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。
--- a/README.md
+++ b/README.md
@ -1159,99 +1159,98 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
   pip install raganything
   ```
 2. Process multimodal documents:
    <details>
    <summary> <b> RAGAnything Usage Example </b></summary>
        ```python
        import asyncio
        from raganything import RAGAnything
        from lightrag import LightRAG
        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
        from lightrag.utils import EmbeddingFunc
        import os
-<details>
+        async def load_existing_lightrag():
-  <summary> <b> RAGAnything Usage Example </b></summary>
+            # First, create or load an existing LightRAG instance
-    ```python
+            lightrag_working_dir = "./existing_lightrag_storage"
    import asyncio
    from raganything import RAGAnything
    from lightrag import LightRAG
    from lightrag.llm.openai import openai_complete_if_cache, openai_embed
    from lightrag.utils import EmbeddingFunc
    import os
-    async def load_existing_lightrag():
+            # Check if previous LightRAG instance exists
-        # First, create or load an existing LightRAG instance
+            if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
-        lightrag_working_dir = "./existing_lightrag_storage"
+                print("✅ Found existing LightRAG instance, loading...")
            else:
                print("❌ No existing LightRAG instance found, will create new one")
-        # Check if previous LightRAG instance exists
+            # Create/Load LightRAG instance with your configurations
-        if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
+            lightrag_instance = LightRAG(
-            print("✅ Found existing LightRAG instance, loading...")
+                working_dir=lightrag_working_dir,
-        else:
+                llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
-            print("❌ No existing LightRAG instance found, will create new one")
+                    "gpt-4o-mini",
-
+                    prompt,
-        # Create/Load LightRAG instance with your configurations
+                    system_prompt=system_prompt,
-        lightrag_instance = LightRAG(
+                    history_messages=history_messages,
-            working_dir=lightrag_working_dir,
+                    api_key="your-api-key",
-            llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
+                    **kwargs,
                "gpt-4o-mini",
                prompt,
                system_prompt=system_prompt,
                history_messages=history_messages,
                api_key="your-api-key",
                **kwargs,
            ),
            embedding_func=EmbeddingFunc(
                embedding_dim=3072,
                max_token_size=8192,
                func=lambda texts: openai_embed(
                    texts,
                    model="text-embedding-3-large",
                    api_key=api_key,
                    base_url=base_url,
                ),
                embedding_func=EmbeddingFunc(
                    embedding_dim=3072,
                    max_token_size=8192,
                    func=lambda texts: openai_embed(
                        texts,
                        model="text-embedding-3-large",
                        api_key=api_key,
                        base_url=base_url,
                    ),
                )
            )
        )
-        # Initialize storage (this will load existing data if available)
+            # Initialize storage (this will load existing data if available)
-        await lightrag_instance.initialize_storages()
+            await lightrag_instance.initialize_storages()
-        # Now initialize RAGAnything with the existing LightRAG instance
+            # Now initialize RAGAnything with the existing LightRAG instance
-        rag = RAGAnything(
+            rag = RAGAnything(
-            lightrag=lightrag_instance,  # Pass the existing LightRAG instance
+                lightrag=lightrag_instance,  # Pass the existing LightRAG instance
-            # Only need vision model for multimodal processing
+                # Only need vision model for multimodal processing
-            vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
+                vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
-                "gpt-4o",
+                    "gpt-4o",
-                "",
+                    "",
-                system_prompt=None,
+                    system_prompt=None,
-                history_messages=[],
+                    history_messages=[],
-                messages=[
+                    messages=[
-                    {"role": "system", "content": system_prompt} if system_prompt else None,
+                        {"role": "system", "content": system_prompt} if system_prompt else None,
-                    {"role": "user", "content": [
+                        {"role": "user", "content": [
-                        {"type": "text", "text": prompt},
+                            {"type": "text", "text": prompt},
-                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
+                            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
-                    ]} if image_data else {"role": "user", "content": prompt}
+                        ]} if image_data else {"role": "user", "content": prompt}
-                ],
+                    ],
-                api_key="your-api-key",
+                    api_key="your-api-key",
-                **kwargs,
+                    **kwargs,
-            ) if image_data else openai_complete_if_cache(
+                ) if image_data else openai_complete_if_cache(
-                "gpt-4o-mini",
+                    "gpt-4o-mini",
-                prompt,
+                    prompt,
-                system_prompt=system_prompt,
+                    system_prompt=system_prompt,
-                history_messages=history_messages,
+                    history_messages=history_messages,
-                api_key="your-api-key",
+                    api_key="your-api-key",
-                **kwargs,
+                    **kwargs,
                )
                # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
            )
            # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
        )
-        # Query the existing knowledge base
+            # Query the existing knowledge base
-        result = await rag.query_with_multimodal(
+            result = await rag.query_with_multimodal(
-            "What data has been processed in this LightRAG instance?",
+                "What data has been processed in this LightRAG instance?",
-            mode="hybrid"
+                mode="hybrid"
-        )
+            )
-        print("Query result:", result)
+            print("Query result:", result)
-        # Add new multimodal documents to the existing LightRAG instance
+            # Add new multimodal documents to the existing LightRAG instance
-        await rag.process_document_complete(
+            await rag.process_document_complete(
-            file_path="path/to/new/multimodal_document.pdf",
+                file_path="path/to/new/multimodal_document.pdf",
-            output_dir="./output"
+                output_dir="./output"
-        )
+            )
-    if __name__ == "__main__":
+        if __name__ == "__main__":
-        asyncio.run(load_existing_lightrag())
+            asyncio.run(load_existing_lightrag())
-    ```
+        ```
-</details>
+    </details>
 For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).