Update README
This commit is contained in:
parent
145e3a238b
commit
fc7a0329df
2 changed files with 169 additions and 112 deletions
118
README-zh.md
118
README-zh.md
|
|
@ -1108,40 +1108,98 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
||||||
pip install raganything
|
pip install raganything
|
||||||
```
|
```
|
||||||
2. 处理多模态文档:
|
2. 处理多模态文档:
|
||||||
```python
|
<details>
|
||||||
import asyncio
|
<summary> <b> RAGAnything 使用实例 </b></summary>
|
||||||
from raganything import RAGAnything
|
```python
|
||||||
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
import asyncio
|
||||||
|
from raganything import RAGAnything
|
||||||
|
from lightrag import LightRAG
|
||||||
|
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||||
|
from lightrag.utils import EmbeddingFunc
|
||||||
|
import os
|
||||||
|
|
||||||
async def main():
|
async def load_existing_lightrag():
|
||||||
# 使用LightRAG集成初始化RAGAnything
|
# 首先,创建或加载现有的 LightRAG 实例
|
||||||
rag = RAGAnything(
|
lightrag_working_dir = "./existing_lightrag_storage"
|
||||||
working_dir="./rag_storage",
|
|
||||||
llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache(
|
|
||||||
"gpt-4o-mini", prompt, api_key="your-api-key", **kwargs
|
|
||||||
),
|
|
||||||
embedding_func=lambda texts: openai_embed(
|
|
||||||
texts, model="text-embedding-3-large", api_key="your-api-key"
|
|
||||||
),
|
|
||||||
embedding_dim=3072,
|
|
||||||
)
|
|
||||||
|
|
||||||
# 处理多模态文档
|
# 检查是否存在之前的 LightRAG 实例
|
||||||
await rag.process_document_complete(
|
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
||||||
file_path="path/to/your/document.pdf",
|
print("✅ Found existing LightRAG instance, loading...")
|
||||||
output_dir="./output"
|
else:
|
||||||
)
|
print("❌ No existing LightRAG instance found, will create new one")
|
||||||
|
|
||||||
# 查询多模态内容
|
# 使用您的配置创建/加载 LightRAG 实例
|
||||||
result = await rag.query_with_multimodal(
|
lightrag_instance = LightRAG(
|
||||||
"图表中显示的主要发现是什么?",
|
working_dir=lightrag_working_dir,
|
||||||
mode="hybrid"
|
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
||||||
)
|
"gpt-4o-mini",
|
||||||
print(result)
|
prompt,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
history_messages=history_messages,
|
||||||
|
api_key="your-api-key",
|
||||||
|
**kwargs,
|
||||||
|
),
|
||||||
|
embedding_func=EmbeddingFunc(
|
||||||
|
embedding_dim=3072,
|
||||||
|
max_token_size=8192,
|
||||||
|
func=lambda texts: openai_embed(
|
||||||
|
texts,
|
||||||
|
model="text-embedding-3-large",
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# 初始化存储(如果有现有数据,这将加载现有数据)
|
||||||
asyncio.run(main())
|
await lightrag_instance.initialize_storages()
|
||||||
```
|
|
||||||
|
# 现在使用现有的 LightRAG 实例初始化 RAGAnything
|
||||||
|
rag = RAGAnything(
|
||||||
|
lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
|
||||||
|
# 仅需要视觉模型用于多模态处理
|
||||||
|
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
|
||||||
|
"gpt-4o",
|
||||||
|
"",
|
||||||
|
system_prompt=None,
|
||||||
|
history_messages=[],
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": system_prompt} if system_prompt else None,
|
||||||
|
{"role": "user", "content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
|
||||||
|
]} if image_data else {"role": "user", "content": prompt}
|
||||||
|
],
|
||||||
|
api_key="your-api-key",
|
||||||
|
**kwargs,
|
||||||
|
) if image_data else openai_complete_if_cache(
|
||||||
|
"gpt-4o-mini",
|
||||||
|
prompt,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
history_messages=history_messages,
|
||||||
|
api_key="your-api-key",
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
# 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
|
||||||
|
)
|
||||||
|
|
||||||
|
# 查询现有的知识库
|
||||||
|
result = await rag.query_with_multimodal(
|
||||||
|
"What data has been processed in this LightRAG instance?",
|
||||||
|
mode="hybrid"
|
||||||
|
)
|
||||||
|
print("Query result:", result)
|
||||||
|
|
||||||
|
# 向现有的 LightRAG 实例添加新的多模态文档
|
||||||
|
await rag.process_document_complete(
|
||||||
|
file_path="path/to/new/multimodal_document.pdf",
|
||||||
|
output_dir="./output"
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(load_existing_lightrag())
|
||||||
|
```
|
||||||
|
</details>
|
||||||
|
|
||||||
如需详细文档和高级用法,请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。
|
如需详细文档和高级用法,请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。
|
||||||
|
|
||||||
|
|
|
||||||
163
README.md
163
README.md
|
|
@ -1159,99 +1159,98 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
||||||
pip install raganything
|
pip install raganything
|
||||||
```
|
```
|
||||||
2. Process multimodal documents:
|
2. Process multimodal documents:
|
||||||
|
<details>
|
||||||
|
<summary> <b> RAGAnything Usage Example </b></summary>
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
from raganything import RAGAnything
|
||||||
|
from lightrag import LightRAG
|
||||||
|
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||||
|
from lightrag.utils import EmbeddingFunc
|
||||||
|
import os
|
||||||
|
|
||||||
<details>
|
async def load_existing_lightrag():
|
||||||
<summary> <b> RAGAnything Usage Example </b></summary>
|
# First, create or load an existing LightRAG instance
|
||||||
```python
|
lightrag_working_dir = "./existing_lightrag_storage"
|
||||||
import asyncio
|
|
||||||
from raganything import RAGAnything
|
|
||||||
from lightrag import LightRAG
|
|
||||||
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
|
||||||
from lightrag.utils import EmbeddingFunc
|
|
||||||
import os
|
|
||||||
|
|
||||||
async def load_existing_lightrag():
|
# Check if previous LightRAG instance exists
|
||||||
# First, create or load an existing LightRAG instance
|
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
||||||
lightrag_working_dir = "./existing_lightrag_storage"
|
print("✅ Found existing LightRAG instance, loading...")
|
||||||
|
else:
|
||||||
|
print("❌ No existing LightRAG instance found, will create new one")
|
||||||
|
|
||||||
# Check if previous LightRAG instance exists
|
# Create/Load LightRAG instance with your configurations
|
||||||
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
lightrag_instance = LightRAG(
|
||||||
print("✅ Found existing LightRAG instance, loading...")
|
working_dir=lightrag_working_dir,
|
||||||
else:
|
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
||||||
print("❌ No existing LightRAG instance found, will create new one")
|
"gpt-4o-mini",
|
||||||
|
prompt,
|
||||||
# Create/Load LightRAG instance with your configurations
|
system_prompt=system_prompt,
|
||||||
lightrag_instance = LightRAG(
|
history_messages=history_messages,
|
||||||
working_dir=lightrag_working_dir,
|
api_key="your-api-key",
|
||||||
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
**kwargs,
|
||||||
"gpt-4o-mini",
|
|
||||||
prompt,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
history_messages=history_messages,
|
|
||||||
api_key="your-api-key",
|
|
||||||
**kwargs,
|
|
||||||
),
|
|
||||||
embedding_func=EmbeddingFunc(
|
|
||||||
embedding_dim=3072,
|
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: openai_embed(
|
|
||||||
texts,
|
|
||||||
model="text-embedding-3-large",
|
|
||||||
api_key=api_key,
|
|
||||||
base_url=base_url,
|
|
||||||
),
|
),
|
||||||
|
embedding_func=EmbeddingFunc(
|
||||||
|
embedding_dim=3072,
|
||||||
|
max_token_size=8192,
|
||||||
|
func=lambda texts: openai_embed(
|
||||||
|
texts,
|
||||||
|
model="text-embedding-3-large",
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url,
|
||||||
|
),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize storage (this will load existing data if available)
|
# Initialize storage (this will load existing data if available)
|
||||||
await lightrag_instance.initialize_storages()
|
await lightrag_instance.initialize_storages()
|
||||||
|
|
||||||
# Now initialize RAGAnything with the existing LightRAG instance
|
# Now initialize RAGAnything with the existing LightRAG instance
|
||||||
rag = RAGAnything(
|
rag = RAGAnything(
|
||||||
lightrag=lightrag_instance, # Pass the existing LightRAG instance
|
lightrag=lightrag_instance, # Pass the existing LightRAG instance
|
||||||
# Only need vision model for multimodal processing
|
# Only need vision model for multimodal processing
|
||||||
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
|
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
|
||||||
"gpt-4o",
|
"gpt-4o",
|
||||||
"",
|
"",
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
history_messages=[],
|
history_messages=[],
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": system_prompt} if system_prompt else None,
|
{"role": "system", "content": system_prompt} if system_prompt else None,
|
||||||
{"role": "user", "content": [
|
{"role": "user", "content": [
|
||||||
{"type": "text", "text": prompt},
|
{"type": "text", "text": prompt},
|
||||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
|
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
|
||||||
]} if image_data else {"role": "user", "content": prompt}
|
]} if image_data else {"role": "user", "content": prompt}
|
||||||
],
|
],
|
||||||
api_key="your-api-key",
|
api_key="your-api-key",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) if image_data else openai_complete_if_cache(
|
) if image_data else openai_complete_if_cache(
|
||||||
"gpt-4o-mini",
|
"gpt-4o-mini",
|
||||||
prompt,
|
prompt,
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
history_messages=history_messages,
|
history_messages=history_messages,
|
||||||
api_key="your-api-key",
|
api_key="your-api-key",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
|
)
|
||||||
|
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
||||||
)
|
)
|
||||||
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
|
||||||
)
|
|
||||||
|
|
||||||
# Query the existing knowledge base
|
# Query the existing knowledge base
|
||||||
result = await rag.query_with_multimodal(
|
result = await rag.query_with_multimodal(
|
||||||
"What data has been processed in this LightRAG instance?",
|
"What data has been processed in this LightRAG instance?",
|
||||||
mode="hybrid"
|
mode="hybrid"
|
||||||
)
|
)
|
||||||
print("Query result:", result)
|
print("Query result:", result)
|
||||||
|
|
||||||
# Add new multimodal documents to the existing LightRAG instance
|
# Add new multimodal documents to the existing LightRAG instance
|
||||||
await rag.process_document_complete(
|
await rag.process_document_complete(
|
||||||
file_path="path/to/new/multimodal_document.pdf",
|
file_path="path/to/new/multimodal_document.pdf",
|
||||||
output_dir="./output"
|
output_dir="./output"
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(load_existing_lightrag())
|
asyncio.run(load_existing_lightrag())
|
||||||
```
|
```
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue