Update README

2025-06-26 16:17:00 +08:00 · 2025-06-26 16:17:00 +08:00 · fc7a0329df
commit fc7a0329df
parent 145e3a238b
2 changed files with 169 additions and 112 deletions
--- a/README-zh.md
+++ b/README-zh.md
@ -1108,40 +1108,98 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
   pip install raganything
   ```
 2. 处理多模态文档：
-   ```python
-   import asyncio
-   from raganything import RAGAnything
-   from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+    <details>
+    <summary> <b> RAGAnything 使用实例 </b></summary>
+        ```python
+        import asyncio
+        from raganything import RAGAnything
+        from lightrag import LightRAG
+        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+        from lightrag.utils import EmbeddingFunc
+        import os

-   async def main():
-       # 使用LightRAG集成初始化RAGAnything
-       rag = RAGAnything(
-           working_dir="./rag_storage",
-           llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache(
-               "gpt-4o-mini", prompt, api_key="your-api-key", **kwargs
-           ),
-           embedding_func=lambda texts: openai_embed(
-               texts, model="text-embedding-3-large", api_key="your-api-key"
-           ),
-           embedding_dim=3072,
-       )
+        async def load_existing_lightrag():
+            # 首先，创建或加载现有的 LightRAG 实例
+            lightrag_working_dir = "./existing_lightrag_storage"

-       # 处理多模态文档
-       await rag.process_document_complete(
-           file_path="path/to/your/document.pdf",
-           output_dir="./output"
-       )
+            # 检查是否存在之前的 LightRAG 实例
+            if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
+                print("✅ Found existing LightRAG instance, loading...")
+            else:
+                print("❌ No existing LightRAG instance found, will create new one")

-       # 查询多模态内容
-       result = await rag.query_with_multimodal(
-           "图表中显示的主要发现是什么？",
-           mode="hybrid"
-       )
-       print(result)
+            # 使用您的配置创建/加载 LightRAG 实例
+            lightrag_instance = LightRAG(
+                working_dir=lightrag_working_dir,
+                llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
+                    "gpt-4o-mini",
+                    prompt,
+                    system_prompt=system_prompt,
+                    history_messages=history_messages,
+                    api_key="your-api-key",
+                    **kwargs,
+                ),
+                embedding_func=EmbeddingFunc(
+                    embedding_dim=3072,
+                    max_token_size=8192,
+                    func=lambda texts: openai_embed(
+                        texts,
+                        model="text-embedding-3-large",
+                        api_key=api_key,
+                        base_url=base_url,
+                    ),
+                )
+            )

-   if __name__ == "__main__":
-       asyncio.run(main())
-   ```
+            # 初始化存储（如果有现有数据，这将加载现有数据）
+            await lightrag_instance.initialize_storages()
+
+            # 现在使用现有的 LightRAG 实例初始化 RAGAnything
+            rag = RAGAnything(
+                lightrag=lightrag_instance,  # 传递现有的 LightRAG 实例
+                # 仅需要视觉模型用于多模态处理
+                vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
+                    "gpt-4o",
+                    "",
+                    system_prompt=None,
+                    history_messages=[],
+                    messages=[
+                        {"role": "system", "content": system_prompt} if system_prompt else None,
+                        {"role": "user", "content": [
+                            {"type": "text", "text": prompt},
+                            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
+                        ]} if image_data else {"role": "user", "content": prompt}
+                    ],
+                    api_key="your-api-key",
+                    **kwargs,
+                ) if image_data else openai_complete_if_cache(
+                    "gpt-4o-mini",
+                    prompt,
+                    system_prompt=system_prompt,
+                    history_messages=history_messages,
+                    api_key="your-api-key",
+                    **kwargs,
+                )
+                # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
+            )
+
+            # 查询现有的知识库
+            result = await rag.query_with_multimodal(
+                "What data has been processed in this LightRAG instance?",
+                mode="hybrid"
+            )
+            print("Query result:", result)
+
+            # 向现有的 LightRAG 实例添加新的多模态文档
+            await rag.process_document_complete(
+                file_path="path/to/new/multimodal_document.pdf",
+                output_dir="./output"
+            )
+
+        if __name__ == "__main__":
+            asyncio.run(load_existing_lightrag())
+        ```
+    </details>

 如需详细文档和高级用法，请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。

--- a/README.md
+++ b/README.md
@ -1159,99 +1159,98 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
   pip install raganything
   ```
 2. Process multimodal documents:
+    <details>
+    <summary> <b> RAGAnything Usage Example </b></summary>
+        ```python
+        import asyncio
+        from raganything import RAGAnything
+        from lightrag import LightRAG
+        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+        from lightrag.utils import EmbeddingFunc
+        import os

-<details>
-  <summary> <b> RAGAnything Usage Example </b></summary>
-    ```python
-    import asyncio
-    from raganything import RAGAnything
-    from lightrag import LightRAG
-    from lightrag.llm.openai import openai_complete_if_cache, openai_embed
-    from lightrag.utils import EmbeddingFunc
-    import os
+        async def load_existing_lightrag():
+            # First, create or load an existing LightRAG instance
+            lightrag_working_dir = "./existing_lightrag_storage"

-    async def load_existing_lightrag():
-        # First, create or load an existing LightRAG instance
-        lightrag_working_dir = "./existing_lightrag_storage"
+            # Check if previous LightRAG instance exists
+            if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
+                print("✅ Found existing LightRAG instance, loading...")
+            else:
+                print("❌ No existing LightRAG instance found, will create new one")

-        # Check if previous LightRAG instance exists
-        if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
-            print("✅ Found existing LightRAG instance, loading...")
-        else:
-            print("❌ No existing LightRAG instance found, will create new one")
-
-        # Create/Load LightRAG instance with your configurations
-        lightrag_instance = LightRAG(
-            working_dir=lightrag_working_dir,
-            llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
-                "gpt-4o-mini",
-                prompt,
-                system_prompt=system_prompt,
-                history_messages=history_messages,
-                api_key="your-api-key",
-                **kwargs,
-            ),
-            embedding_func=EmbeddingFunc(
-                embedding_dim=3072,
-                max_token_size=8192,
-                func=lambda texts: openai_embed(
-                    texts,
-                    model="text-embedding-3-large",
-                    api_key=api_key,
-                    base_url=base_url,
+            # Create/Load LightRAG instance with your configurations
+            lightrag_instance = LightRAG(
+                working_dir=lightrag_working_dir,
+                llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
+                    "gpt-4o-mini",
+                    prompt,
+                    system_prompt=system_prompt,
+                    history_messages=history_messages,
+                    api_key="your-api-key",
+                    **kwargs,
                ),
+                embedding_func=EmbeddingFunc(
+                    embedding_dim=3072,
+                    max_token_size=8192,
+                    func=lambda texts: openai_embed(
+                        texts,
+                        model="text-embedding-3-large",
+                        api_key=api_key,
+                        base_url=base_url,
+                    ),
+                )
            )
-        )

-        # Initialize storage (this will load existing data if available)
-        await lightrag_instance.initialize_storages()
+            # Initialize storage (this will load existing data if available)
+            await lightrag_instance.initialize_storages()

-        # Now initialize RAGAnything with the existing LightRAG instance
-        rag = RAGAnything(
-            lightrag=lightrag_instance,  # Pass the existing LightRAG instance
-            # Only need vision model for multimodal processing
-            vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
-                "gpt-4o",
-                "",
-                system_prompt=None,
-                history_messages=[],
-                messages=[
-                    {"role": "system", "content": system_prompt} if system_prompt else None,
-                    {"role": "user", "content": [
-                        {"type": "text", "text": prompt},
-                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
-                    ]} if image_data else {"role": "user", "content": prompt}
-                ],
-                api_key="your-api-key",
-                **kwargs,
-            ) if image_data else openai_complete_if_cache(
-                "gpt-4o-mini",
-                prompt,
-                system_prompt=system_prompt,
-                history_messages=history_messages,
-                api_key="your-api-key",
-                **kwargs,
+            # Now initialize RAGAnything with the existing LightRAG instance
+            rag = RAGAnything(
+                lightrag=lightrag_instance,  # Pass the existing LightRAG instance
+                # Only need vision model for multimodal processing
+                vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
+                    "gpt-4o",
+                    "",
+                    system_prompt=None,
+                    history_messages=[],
+                    messages=[
+                        {"role": "system", "content": system_prompt} if system_prompt else None,
+                        {"role": "user", "content": [
+                            {"type": "text", "text": prompt},
+                            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
+                        ]} if image_data else {"role": "user", "content": prompt}
+                    ],
+                    api_key="your-api-key",
+                    **kwargs,
+                ) if image_data else openai_complete_if_cache(
+                    "gpt-4o-mini",
+                    prompt,
+                    system_prompt=system_prompt,
+                    history_messages=history_messages,
+                    api_key="your-api-key",
+                    **kwargs,
+                )
+                # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
            )
-            # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
-        )

-        # Query the existing knowledge base
-        result = await rag.query_with_multimodal(
-            "What data has been processed in this LightRAG instance?",
-            mode="hybrid"
-        )
-        print("Query result:", result)
+            # Query the existing knowledge base
+            result = await rag.query_with_multimodal(
+                "What data has been processed in this LightRAG instance?",
+                mode="hybrid"
+            )
+            print("Query result:", result)

-        # Add new multimodal documents to the existing LightRAG instance
-        await rag.process_document_complete(
-            file_path="path/to/new/multimodal_document.pdf",
-            output_dir="./output"
-        )
+            # Add new multimodal documents to the existing LightRAG instance
+            await rag.process_document_complete(
+                file_path="path/to/new/multimodal_document.pdf",
+                output_dir="./output"
+            )

-    if __name__ == "__main__":
-        asyncio.run(load_existing_lightrag())
-    ```
-</details>
+        if __name__ == "__main__":
+            asyncio.run(load_existing_lightrag())
+        ```
+    </details>

 For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).