Update README

This commit is contained in:
yangdx 2025-07-09 15:17:05 +08:00
parent feb30d8987
commit bfa0844ecb
2 changed files with 42 additions and 18 deletions

View file

@ -824,7 +824,7 @@ rag = LightRAG(
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype)); create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties); CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx; ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-- 如有必要可以删除 -- 如有必要可以删除
drop INDEX entity_p_idx; drop INDEX entity_p_idx;
drop INDEX vertex_p_idx; drop INDEX vertex_p_idx;
@ -849,6 +849,18 @@ rag = LightRAG(
</details> </details>
### LightRAG实例间的数据隔离
通过 workspace 参数可以不同实现不同LightRAG实例之间的存储数据隔离。LightRAG在初始化后workspace就已经确定之后修改workspace是无效的。下面是不同类型的存储实现工作空间的方式
- **对于本地基于文件的数据库,数据隔离通过工作空间子目录实现:** JsonKVStorage, JsonDocStatusStorage, NetworkXStorage, NanoVectorDBStorage, FaissVectorDBStorage。
- **对于将数据存储在集合collection中的数据库通过在集合名称前添加工作空间前缀来实现** RedisKVStorage, RedisDocStatusStorage, MilvusVectorDBStorage, QdrantVectorDBStorage, MongoKVStorage, MongoDocStatusStorage, MongoVectorDBStorage, MongoGraphStorage, PGGraphStorage。
- **对于关系型数据库,数据隔离通过向表中添加 `workspace` 字段进行数据的逻辑隔离:** PGKVStorage, PGVectorStorage, PGDocStatusStorage。
* **对于Neo4j图数据库通过label来实现数据的逻辑隔离**Neo4JStorage
为了保持对遗留数据的兼容在未配置工作空间时PostgreSQL的默认工作空间为`default`Neo4j的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`
## 编辑实体和关系 ## 编辑实体和关系
LightRAG现在支持全面的知识图谱管理功能允许您在知识图谱中创建、编辑和删除实体和关系。 LightRAG现在支持全面的知识图谱管理功能允许您在知识图谱中创建、编辑和删除实体和关系。
@ -1170,17 +1182,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc from lightrag.utils import EmbeddingFunc
import os import os
async def load_existing_lightrag(): async def load_existing_lightrag():
# 首先,创建或加载现有的 LightRAG 实例 # 首先,创建或加载现有的 LightRAG 实例
lightrag_working_dir = "./existing_lightrag_storage" lightrag_working_dir = "./existing_lightrag_storage"
# 检查是否存在之前的 LightRAG 实例 # 检查是否存在之前的 LightRAG 实例
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
print("✅ Found existing LightRAG instance, loading...") print("✅ Found existing LightRAG instance, loading...")
else: else:
print("❌ No existing LightRAG instance found, will create new one") print("❌ No existing LightRAG instance found, will create new one")
# 使用您的配置创建/加载 LightRAG 实例 # 使用您的配置创建/加载 LightRAG 实例
lightrag_instance = LightRAG( lightrag_instance = LightRAG(
working_dir=lightrag_working_dir, working_dir=lightrag_working_dir,
@ -1203,10 +1215,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
), ),
) )
) )
# 初始化存储(如果有现有数据,这将加载现有数据) # 初始化存储(如果有现有数据,这将加载现有数据)
await lightrag_instance.initialize_storages() await lightrag_instance.initialize_storages()
# 现在使用现有的 LightRAG 实例初始化 RAGAnything # 现在使用现有的 LightRAG 实例初始化 RAGAnything
rag = RAGAnything( rag = RAGAnything(
lightrag=lightrag_instance, # 传递现有的 LightRAG 实例 lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
@ -1235,20 +1247,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
) )
# 注意working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承 # 注意working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
) )
# 查询现有的知识库 # 查询现有的知识库
result = await rag.query_with_multimodal( result = await rag.query_with_multimodal(
"What data has been processed in this LightRAG instance?", "What data has been processed in this LightRAG instance?",
mode="hybrid" mode="hybrid"
) )
print("Query result:", result) print("Query result:", result)
# 向现有的 LightRAG 实例添加新的多模态文档 # 向现有的 LightRAG 实例添加新的多模态文档
await rag.process_document_complete( await rag.process_document_complete(
file_path="path/to/new/multimodal_document.pdf", file_path="path/to/new/multimodal_document.pdf",
output_dir="./output" output_dir="./output"
) )
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(load_existing_lightrag()) asyncio.run(load_existing_lightrag())
``` ```

View file

@ -239,6 +239,7 @@ A full list of LightRAG init parameters:
| **Parameter** | **Type** | **Explanation** | **Default** | | **Parameter** | **Type** | **Explanation** | **Default** |
|--------------|----------|-----------------|-------------| |--------------|----------|-----------------|-------------|
| **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` | | **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
| **workspace** | str | Workspace name for data isolation between different LightRAG Instances | |
| **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` | | **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` |
| **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` | | **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` |
| **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` | | **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` |
@ -796,7 +797,7 @@ For production level scenarios you will most likely want to leverage an enterpri
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype)); create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties); CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx; ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
-- drop if necessary -- drop if necessary
drop INDEX entity_p_idx; drop INDEX entity_p_idx;
drop INDEX vertex_p_idx; drop INDEX vertex_p_idx;
@ -895,6 +896,17 @@ async def initialize_rag():
</details> </details>
### Data Isolation Between LightRAG Instances
The `workspace` parameter ensures data isolation between different LightRAG instances. Once initialized, the `workspace` is immutable and cannot be changed.Here is how workspaces are implemented for different types of storage:
- **For local file-based databases, data isolation is achieved through workspace subdirectories:** `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage`.
- **For databases that store data in collections, it's done by adding a workspace prefix to the collection name:** `RedisKVStorage`, `RedisDocStatusStorage`, `MilvusVectorDBStorage`, `QdrantVectorDBStorage`, `MongoKVStorage`, `MongoDocStatusStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage`.
- **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
- **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
## Edit Entities and Relations ## Edit Entities and Relations
LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph. LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
@ -1219,17 +1231,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc from lightrag.utils import EmbeddingFunc
import os import os
async def load_existing_lightrag(): async def load_existing_lightrag():
# First, create or load an existing LightRAG instance # First, create or load an existing LightRAG instance
lightrag_working_dir = "./existing_lightrag_storage" lightrag_working_dir = "./existing_lightrag_storage"
# Check if previous LightRAG instance exists # Check if previous LightRAG instance exists
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
print("✅ Found existing LightRAG instance, loading...") print("✅ Found existing LightRAG instance, loading...")
else: else:
print("❌ No existing LightRAG instance found, will create new one") print("❌ No existing LightRAG instance found, will create new one")
# Create/Load LightRAG instance with your configurations # Create/Load LightRAG instance with your configurations
lightrag_instance = LightRAG( lightrag_instance = LightRAG(
working_dir=lightrag_working_dir, working_dir=lightrag_working_dir,
@ -1252,10 +1264,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
), ),
) )
) )
# Initialize storage (this will load existing data if available) # Initialize storage (this will load existing data if available)
await lightrag_instance.initialize_storages() await lightrag_instance.initialize_storages()
# Now initialize RAGAnything with the existing LightRAG instance # Now initialize RAGAnything with the existing LightRAG instance
rag = RAGAnything( rag = RAGAnything(
lightrag=lightrag_instance, # Pass the existing LightRAG instance lightrag=lightrag_instance, # Pass the existing LightRAG instance
@ -1284,20 +1296,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
) )
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
) )
# Query the existing knowledge base # Query the existing knowledge base
result = await rag.query_with_multimodal( result = await rag.query_with_multimodal(
"What data has been processed in this LightRAG instance?", "What data has been processed in this LightRAG instance?",
mode="hybrid" mode="hybrid"
) )
print("Query result:", result) print("Query result:", result)
# Add new multimodal documents to the existing LightRAG instance # Add new multimodal documents to the existing LightRAG instance
await rag.process_document_complete( await rag.process_document_complete(
file_path="path/to/new/multimodal_document.pdf", file_path="path/to/new/multimodal_document.pdf",
output_dir="./output" output_dir="./output"
) )
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(load_existing_lightrag()) asyncio.run(load_existing_lightrag())
``` ```