summary

2025-09-05 19:31:00 +08:00 · 2025-09-05 19:31:00 +08:00 · 0dc11e0794
commit 0dc11e0794
parent 8620ce0b01 8fd7bca8d7
19 changed files with 641 additions and 732 deletions
--- a/.github/workflows/docker-build-manual.yml
+++ b/.github/workflows/docker-build-manual.yml
@ -0,0 +1,73 @@
+name: Build Test Docker Image manually
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Fetch all history for tags
+
+      - name: Get latest tag
+        id: get_tag
+        run: |
+          # Get the latest tag, fallback to commit SHA if no tags exist
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
+          if [ -z "$LATEST_TAG" ]; then
+            LATEST_TAG="sha-$(git rev-parse --short HEAD)"
+            echo "No tags found, using commit SHA: $LATEST_TAG"
+          else
+            echo "Latest tag found: $LATEST_TAG"
+          fi
+          echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
+          echo "image_tag=$LATEST_TAG" >> $GITHUB_OUTPUT
+
+      - name: Update version in __init__.py
+        run: |
+          sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py
+          echo "Updated __init__.py with version ${{ steps.get_tag.outputs.tag }}"
+          cat lightrag/__init__.py | grep __version__
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ghcr.io/${{ github.repository }}
+          tags: |
+            type=raw,value=${{ steps.get_tag.outputs.tag }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Output image details
+        run: |
+          echo "Docker image built and pushed successfully!"
+          echo "Image tags:"
+          echo "  - ghcr.io/${{ github.repository }}:${{ steps.get_tag.outputs.tag }}"
+          echo "Latest Git tag used: ${{ steps.get_tag.outputs.tag }}"
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@ -1,4 +1,4 @@
-name: Build and Push Docker Image
+name: Build Latest Docker Image on Release

 on:
  release:
@ -15,6 +15,8 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Fetch all history for tags

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@ -26,13 +28,26 @@ jobs:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

+      - name: Get latest tag
+        id: get_tag
+        run: |
+          TAG=$(git describe --tags --abbrev=0)
+          echo "Found tag: $TAG"
+          echo "tag=$TAG" >> $GITHUB_OUTPUT
+
+      - name: Update version in __init__.py
+        run: |
+          sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py
+          echo "Updated __init__.py with version ${{ steps.get_tag.outputs.tag }}"
+          cat lightrag/__init__.py | grep __version__
+
      - name: Extract metadata for Docker
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ghcr.io/${{ github.repository }}
          tags: |
-            type=semver,pattern={{version}}
+            type=raw,value=${{ steps.get_tag.outputs.tag }}
            type=raw,value=latest,enable={{is_default_branch}}

      - name: Build and push Docker image
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@ -13,11 +13,27 @@ jobs:

    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Fetch all history for tags

      - uses: actions/setup-python@v5
        with:
          python-version: "3.x"

+      - name: Get version from tag
+        id: get_version
+        run: |
+          TAG=$(git describe --tags --abbrev=0)
+          echo "Found tag: $TAG"
+          echo "Extracted version: $TAG"
+          echo "version=$TAG" >> $GITHUB_OUTPUT
+
+      - name: Update version in __init__.py
+        run: |
+          sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_version.outputs.version }}\"/" lightrag/__init__.py
+          echo "Updated __init__.py with version ${{ steps.get_version.outputs.version }}"
+          cat lightrag/__init__.py | grep __version__
+
      - name: Build release distributions
        run: |
          python -m pip install build
--- a/16
+++ b/16
@ -1,8 +1,11 @@
 # Build stage
-FROM python:3.11-slim AS builder
+FROM python:3.12-slim AS builder

 WORKDIR /app

+# Upgrade pip、setuptools and wheel to the latest version
+RUN pip install --upgrade pip setuptools wheel
+
 # Install Rust and required build dependencies
 RUN apt-get update && apt-get install -y \
    curl \
@ -19,8 +22,8 @@ COPY lightrag/ ./lightrag/

 # Install dependencies
 ENV PATH="/root/.cargo/bin:${PATH}"
-RUN pip install --user --no-cache-dir .
-RUN pip install --user --no-cache-dir .[api]
+RUN pip install --user --no-cache-dir --use-pep517 .
+RUN pip install --user --no-cache-dir --use-pep517 .[api]

 # Install depndencies for default storage
 RUN pip install --user --no-cache-dir nano-vectordb networkx
@ -30,16 +33,19 @@ RUN pip install --user --no-cache-dir openai ollama tiktoken
 RUN pip install --user --no-cache-dir pypdf2 python-docx python-pptx openpyxl

 # Final stage
-FROM python:3.11-slim
+FROM python:3.12-slim

 WORKDIR /app

+# Upgrade pip and setuptools
+RUN pip install --upgrade pip setuptools wheel
+
 # Copy only necessary files from builder
 COPY --from=builder /root/.local /root/.local
 COPY ./lightrag ./lightrag
 COPY setup.py .

-RUN pip install ".[api]"
+RUN pip install --use-pep517 ".[api]"
 # Make sure scripts in .local are usable
 ENV PATH=/root/.local/bin:$PATH

--- a/README-zh.md
+++ b/README-zh.md
@ -50,7 +50,8 @@

 ## 🎉 新闻

- [X] [2025.06.05]🎯📢LightRAG现已集成RAG-Anything，支持全面的多模态文档解析与RAG能力（PDF、图片、Office文档、表格、公式等）。详见下方[多模态处理模块](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#多模态文档处理rag-anything集成)。
+- [X] [2025.06.16]🎯📢我们的团队发布了[RAG-Anything](https://github.com/HKUDS/RAG-Anything)，一个用于无缝处理文本、图像、表格和方程式的全功能多模态 RAG 系统。
+- [X] [2025.06.05]🎯📢LightRAG现已集成[RAG-Anything](https://github.com/HKUDS/RAG-Anything)，支持全面的多模态文档解析与RAG能力（PDF、图片、Office文档、表格、公式等）。详见下方[多模态处理模块](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#多模态文档处理rag-anything集成)。
 - [X] [2025.03.18]🎯📢LightRAG现已支持引文功能。
 - [X] [2025.02.05]🎯📢我们团队发布了[VideoRAG](https://github.com/HKUDS/VideoRAG)，用于理解超长上下文视频。
 - [X] [2025.01.13]🎯📢我们团队发布了[MiniRAG](https://github.com/HKUDS/MiniRAG)，使用小型模型简化RAG。
@ -142,6 +143,8 @@ LightRAG对大型语言模型（LLM）的能力要求远高于传统RAG，因为
 - **LLM选型**：
  - 推荐选用参数量至少为32B的LLM。
  - 上下文长度至少为32KB，推荐达到64KB。
+  - 在文档索引阶段不建议选择推理模型。
+  - 在查询阶段建议选择比索引阶段能力更强的模型，以达到更高的查询效果。
 - **Embedding模型**：
  - 高性能的Embedding模型对RAG至关重要。
  - 推荐使用主流的多语言Embedding模型，例如：BAAI/bge-m3 和 text-embedding-3-large。
@ -178,7 +181,7 @@ python examples/lightrag_openai_demo.py

 ## 使用LightRAG Core进行编程

-> 如果您希望将LightRAG集成到您的项目中，建议您使用LightRAG Server提供的REST API。LightRAG Core通常用于嵌入式应用，或供希望进行研究与评估的学者使用。
+> ⚠️ **如果您希望将LightRAG集成到您的项目中，建议您使用LightRAG Server提供的REST API**。LightRAG Core通常用于嵌入式应用，或供希望进行研究与评估的学者使用。

 ### 一个简单程序

@ -265,13 +268,14 @@ if __name__ == "__main__":
 | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
 | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
-| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`（默认值由环境变量MAX_TOKENS更改） |
+| **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`（由环境变量 SUMMARY_MAX_CONTEXT 设置） |
+| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`（由环境变量 SUMMARY_MAX_TOKENS 设置） |
 | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`（默认值由环境变量MAX_ASYNC更改） |
 | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
 | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数，如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2（默认值由环境变量COSINE_THRESHOLD更改） |
 | **enable_llm_cache** | `bool` | 如果为`TRUE`，将LLM结果存储在缓存中；重复的提示返回缓存的响应 | `TRUE` |
 | **enable_llm_cache_for_entity_extract** | `bool` | 如果为`TRUE`，将实体提取的LLM结果存储在缓存中；适合初学者调试应用程序 | `TRUE` |
-| **addon_params** | `dict` | 附加参数，例如`{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"]}`：设置示例限制、输出语言和文档处理的批量大小 | `example_number: 所有示例, language: English` |
+| **addon_params** | `dict` | 附加参数，例如`{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`：设置示例限制、输出语言和文档处理的批量大小 | language: English` |
 | **embedding_cache_config** | `dict` | 问答缓存的配置。包含三个参数：`enabled`：布尔值，启用/禁用缓存查找功能。启用时，系统将在生成新答案之前检查缓存的响应。`similarity_threshold`：浮点值（0-1），相似度阈值。当新问题与缓存问题的相似度超过此阈值时，将直接返回缓存的答案而不调用LLM。`use_llm_check`：布尔值，启用/禁用LLM相似度验证。启用时，在返回缓存答案之前，将使用LLM作为二次检查来验证问题之间的相似度。 | 默认：`{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |

 </details>
@ -591,31 +595,15 @@ if __name__ == "__main__":

 </details>

-### 对话历史
+### Rerank函数注入

-LightRAG现在通过对话历史功能支持多轮对话。以下是使用方法：
+为了提高检索质量，可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数：

-```python
-# 创建对话历史
-conversation_history = [
-    {"role": "user", "content": "主角对圣诞节的态度是什么？"},
-    {"role": "assistant", "content": "在故事开始时，埃比尼泽·斯克鲁奇对圣诞节持非常消极的态度..."},
-    {"role": "user", "content": "他的态度是如何改变的？"}
-]
+* **Cohere / vLLM**: `cohere_rerank`
+* **Jina AI**: `jina_rerank`
+* **Aliyun阿里云**: `ali_rerank`

-# 创建带有对话历史的查询参数
-query_param = QueryParam(
-    mode="mix",  # 或其他模式："local"、"global"、"hybrid"
-    conversation_history=conversation_history,  # 添加对话历史
-    history_turns=3  # 考虑最近的对话轮数
-)
-
-# 进行考虑对话历史的查询
-response = rag.query(
-    "是什么导致了他性格的这种变化？",
-    param=query_param
-)
-```
+您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法，请参阅`examples/rerank_example.py`文件。

 ### 用户提示词 vs. 查询内容

@ -787,7 +775,7 @@ MongoDocStatusStorage       MongoDB
 每一种存储类型的链接配置范例可以在 `env.example` 文件中找到。链接字符串中的数据库实例是需要你预先在数据库服务器上创建好的，LightRAG 仅负责在数据库实例中创建数据表，不负责创建数据库实例。如果使用 Redis 作为存储，记得给 Redis 配置自动持久化数据规则，否则 Redis 服务重启后数据会丢失。如果使用PostgreSQL数据库，推荐使用16.6版本或以上。

 <details>
-<summary> <b>使用Neo4J进行存储</b> </summary>
+<summary> <b>使用Neo4J存储</b> </summary>

 * 对于生产级场景，您很可能想要利用企业级解决方案
 * 进行KG存储。推荐在Docker中运行Neo4J以进行无缝本地测试。
@ -825,7 +813,7 @@ async def initialize_rag():
 </details>

 <details>
-<summary> <b>使用Faiss进行存储</b> </summary>
+<summary> <b>使用Faiss存储</b> </summary>
 在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。

 - 安装所需依赖：
@ -862,18 +850,39 @@ rag = LightRAG(
 </details>

 <details>
-<summary> <b>使用PostgreSQL进行存储</b> </summary>
+<summary> <b>使用PostgreSQL存储</b> </summary>

-对于生产级场景，您很可能想要利用企业级解决方案。PostgreSQL可以为您提供一站式解决方案，作为KV存储、向量数据库（pgvector）和图数据库（apache AGE）。支持 PostgreSQL 版本为16.6或以上。
+对于生产级场景，您很可能想要利用企业级解决方案。PostgreSQL可以为您提供一站式储解解决方案，作为KV存储、向量数据库（pgvector）和图数据库（apache AGE）。支持 PostgreSQL 版本为16.6或以上。

-* PostgreSQL很轻量，整个二进制发行版包括所有必要的插件可以压缩到40MB：参考[Windows发布版](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0)，它在Linux/Mac上也很容易安装。
 * 如果您是初学者并想避免麻烦，推荐使用docker，请从这个镜像开始（请务必阅读概述）：https://hub.docker.com/r/shangor/postgres-for-rag
-* 如何开始？参考：[examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
-
 * Apache AGE的性能不如Neo4j。最求高性能的图数据库请使用Noe4j。

 </details>

+<details>
+<summary> <b>使用MogonDB存储</b> </summary>
+
+MongoDB为LightRAG提供了一站式的存储解决方案。MongoDB提供原生的KV存储和向量存储。LightRAG使用MogoDB的集合实现了一个简易的图存储。MongoDB 官方的向量检索功能（`$vectorSearch`）目前必须依赖其官方的云服务 MongoDB Atlas。无法在自托管的 MongoDB Community/Enterprise 版本上使用此功能。
+
+</details>
+
+<details>
+<summary> <b>使用Redis存储</b> </summary>
+
+LightRAG支持使用Reidis作为KV存储。使用Redis存储的时候需要注意进行持久化配置和内存使用量配置。以下是推荐的redis配置
+
+```
+save 900 1
+save 300 10
+save 60 1000
+stop-writes-on-bgsave-error yes
+maxmemory 4gb
+maxmemory-policy noeviction
+maxclients 500
+```
+
+</details>
+
 ### LightRAG实例间的数据隔离

 通过 workspace 参数可以不同实现不同LightRAG实例之间的存储数据隔离。LightRAG在初始化后workspace就已经确定，之后修改workspace是无效的。下面是不同类型的存储实现工作空间的方式：
--- a/README.md
+++ b/README.md
@ -141,6 +141,8 @@ LightRAG's demands on the capabilities of Large Language Models (LLMs) are signi
 - **LLM Selection**:
  - It is recommended to use an LLM with at least 32 billion parameters.
  - The context length should be at least 32KB, with 64KB being recommended.
+  - It is not recommended to choose reasoning models during the document indexing stage.
+  - During the query stage, it is recommended to choose models with stronger capabilities than those used in the indexing stage to achieve better query results.
 - **Embedding Model**:
  - A high-performance Embedding model is essential for RAG.
  - We recommend using mainstream multilingual Embedding models, such as: `BAAI/bge-m3` and `text-embedding-3-large`.
@ -177,11 +179,12 @@ For a streaming response implementation example, please see `examples/lightrag_o

 ## Programing with LightRAG Core

-> If you would like to integrate LightRAG into your project, we recommend utilizing the REST API provided by the LightRAG Server. LightRAG Core is typically intended for embedded applications or for researchers who wish to conduct studies and evaluations.
+> ⚠️ **If you would like to integrate LightRAG into your project, we recommend utilizing the REST API provided by the LightRAG Server**. LightRAG Core is typically intended for embedded applications or for researchers who wish to conduct studies and evaluations.

 ### ⚠️ Important: Initialization Requirements

 **LightRAG requires explicit initialization before use.** You must call both `await rag.initialize_storages()` and `await initialize_pipeline_status()` after creating a LightRAG instance, otherwise you will encounter errors like:
+
 - `AttributeError: __aenter__` - if storages are not initialized
 - `KeyError: 'history_messages'` - if pipeline status is not initialized

@ -272,13 +275,14 @@ A full list of LightRAG init parameters:
 | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
 | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
-| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`（default value changed by env var MAX_TOKENS) |
+| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`（configured by env var SUMMARY_CONTEXT_SIZE) |
+| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`（configured by env var SUMMARY_MAX_TOKENS) |
 | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`（default value changed by env var MAX_ASYNC) |
 | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
 | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2（default value changed by env var COSINE_THRESHOLD) |
 | **enable_llm_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
 | **enable_llm_cache_for_entity_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
-| **addon_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"]}`: sets example limit, entiy/relation extraction output language | `example_number: all examples, language: English` |
+| **addon_params** | `dict` | Additional parameters, e.g., `{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`: sets example limit, entiy/relation extraction output language | language: English` |
 | **embedding_cache_config** | `dict` | Configuration for question-answer caching. Contains three parameters: `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers. `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM. `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |

 </details>
@ -594,36 +598,15 @@ if __name__ == "__main__":

 </details>

-### Conversation History Support
+### Rerank Function Injection

+To enhance retrieval quality, documents can be re-ranked based on a more effective relevance scoring model. The `rerank.py` file provides three Reranker provider driver functions:

-LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it:
+* **Cohere / vLLM**: `cohere_rerank`
+* **Jina AI**: `jina_rerank`
+* **Aliyun**: `ali_rerank`

-<details>
-  <summary> <b> Usage Example </b></summary>
-
-```python
-# Create conversation history
-conversation_history = [
-    {"role": "user", "content": "What is the main character's attitude towards Christmas?"},
-    {"role": "assistant", "content": "At the beginning of the story, Ebenezer Scrooge has a very negative attitude towards Christmas..."},
-    {"role": "user", "content": "How does his attitude change?"}
-]
-
-# Create query parameters with conversation history
-query_param = QueryParam(
-    mode="mix",  # or any other mode: "local", "global", "hybrid"
-    conversation_history=conversation_history,  # Add the conversation history
-)
-
-# Make a query that takes into account the conversation history
-response = rag.query(
-    "What causes this change in his character?",
-    param=query_param
-)
-```
-
-</details>
+You can inject one of these functions into the `rerank_model_func` attribute of the LightRAG object. This will enable LightRAG's query function to re-order retrieved text blocks using the injected function. For detailed usage, please refer to the `examples/rerank_example.py` file.

 ### User Prompt vs. Query

@ -644,8 +627,6 @@ response_default = rag.query(
 print(response_default)
 ```

-
-
 ### Insert

 <details>
@ -798,7 +779,7 @@ MongoDocStatusStorage       MongoDB
 Example connection configurations for each storage type can be found in the `env.example` file. The database instance in the connection string needs to be created by you on the database server beforehand. LightRAG is only responsible for creating tables within the database instance, not for creating the database instance itself. If using Redis as storage, remember to configure automatic data persistence rules for Redis, otherwise data will be lost after the Redis service restarts. If using PostgreSQL, it is recommended to use version 16.6 or above.

 <details>
-<summary> <b>Using Neo4J for Storage</b> </summary>
+<summary> <b>Using Neo4J Storage</b> </summary>

 * For production level scenarios you will most likely want to leverage an enterprise solution
 * for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
@ -837,7 +818,7 @@ see test_neo4j.py for a working example.
 </details>

 <details>
-<summary> <b>Using PostgreSQL for Storage</b> </summary>
+<summary> <b>Using PostgreSQL Storage</b> </summary>

 For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE). PostgreSQL version 16.6 or higher is supported.

@ -849,7 +830,7 @@ For production level scenarios you will most likely want to leverage an enterpri
 </details>

 <details>
-<summary> <b>Using Faiss for Storage</b> </summary>
+<summary> <b>Using Faiss Storage</b> </summary>
 Before using Faiss vector database, you must manually install `faiss-cpu` or `faiss-gpu`.

 - Install the required dependencies:
@ -920,6 +901,30 @@ async def initialize_rag():

 </details>

+<details>
+<summary> <b>Using MongoDB Storage</b> </summary>
+
+MongoDB provides a one-stop storage solution for LightRAG. MongoDB offers native KV storage and vector storage. LightRAG uses MongoDB collections to implement a simple graph storage. MongoDB's official vector search functionality (`$vectorSearch`) currently requires their official cloud service MongoDB Atlas. This functionality cannot be used on self-hosted MongoDB Community/Enterprise versions.
+
+</details>
+
+<details>
+<summary> <b>Using Redis Storage</b> </summary>
+
+LightRAG supports using Redis as KV storage. When using Redis storage, attention should be paid to persistence configuration and memory usage configuration. The following is the recommended Redis configuration:
+
+```
+save 900 1
+save 300 10
+save 60 1000
+stop-writes-on-bgsave-error yes
+maxmemory 4gb
+maxmemory-policy noeviction
+maxclients 500
+```
+
+</details>
+
 ### Data Isolation Between LightRAG Instances

 The `workspace` parameter ensures data isolation between different LightRAG instances. Once initialized, the `workspace` is immutable and cannot be changed.Here is how workspaces are implemented for different types of storage:
@ -1287,8 +1292,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                    ),
                )
            )
+
            # Initialize storage (this will load existing data if available)
            await lightrag_instance.initialize_storages()
+
            # Now initialize RAGAnything with the existing LightRAG instance
            rag = RAGAnything(
                lightrag=lightrag_instance,  # Pass the existing LightRAG instance
@ -1317,12 +1324,14 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                )
                # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
            )
+
            # Query the existing knowledge base
            result = await rag.query_with_multimodal(
                "What data has been processed in this LightRAG instance?",
                mode="hybrid"
            )
            print("Query result:", result)
+
            # Add new multimodal documents to the existing LightRAG instance
            await rag.process_document_complete(
                file_path="path/to/new/multimodal_document.pdf",
--- a/docs/rerank_integration.md
+++ b/docs/rerank_integration.md
@ -1,281 +0,0 @@
-# Rerank Integration Guide
-
-LightRAG supports reranking functionality to improve retrieval quality by re-ordering documents based on their relevance to the query. Reranking is now controlled per query via the `enable_rerank` parameter (default: True).
-
-## Quick Start
-
-### Environment Variables
-
-Set these variables in your `.env` file or environment for rerank model configuration:
-
-```bash
-# Rerank model configuration (required when enable_rerank=True in queries)
-RERANK_MODEL=BAAI/bge-reranker-v2-m3
-RERANK_BINDING_HOST=https://api.your-provider.com/v1/rerank
-RERANK_BINDING_API_KEY=your_api_key_here
-```
-
-### Programmatic Configuration
-
-```python
-from lightrag import LightRAG, QueryParam
-from lightrag.rerank import custom_rerank, RerankModel
-
-# Method 1: Using a custom rerank function with all settings included
-async def my_rerank_func(query: str, documents: list, top_n: int = None, **kwargs):
-    return await custom_rerank(
-        query=query,
-        documents=documents,
-        model="BAAI/bge-reranker-v2-m3",
-        base_url="https://api.your-provider.com/v1/rerank",
-        api_key="your_api_key_here",
-        top_n=top_n or 10,  # Handle top_n within the function
-        **kwargs
-    )
-
-rag = LightRAG(
-    working_dir="./rag_storage",
-    llm_model_func=your_llm_func,
-    embedding_func=your_embedding_func,
-    rerank_model_func=my_rerank_func,  # Configure rerank function
-)
-
-# Query with rerank enabled (default)
-result = await rag.aquery(
-    "your query",
-    param=QueryParam(enable_rerank=True)  # Control rerank per query
-)
-
-# Query with rerank disabled
-result = await rag.aquery(
-    "your query",
-    param=QueryParam(enable_rerank=False)
-)
-
-# Method 2: Using RerankModel wrapper
-rerank_model = RerankModel(
-    rerank_func=custom_rerank,
-    kwargs={
-        "model": "BAAI/bge-reranker-v2-m3",
-        "base_url": "https://api.your-provider.com/v1/rerank",
-        "api_key": "your_api_key_here",
-    }
-)
-
-rag = LightRAG(
-    working_dir="./rag_storage",
-    llm_model_func=your_llm_func,
-    embedding_func=your_embedding_func,
-    rerank_model_func=rerank_model.rerank,
-)
-
-# Control rerank per query
-result = await rag.aquery(
-    "your query",
-    param=QueryParam(
-        enable_rerank=True,  # Enable rerank for this query
-        chunk_top_k=5       # Number of chunks to keep after reranking
-    )
-)
-```
-
-## Supported Providers
-
-### 1. Custom/Generic API (Recommended)
-
-For Jina/Cohere compatible APIs:
-
-```python
-from lightrag.rerank import custom_rerank
-
-# Your custom API endpoint
-result = await custom_rerank(
-    query="your query",
-    documents=documents,
-    model="BAAI/bge-reranker-v2-m3",
-    base_url="https://api.your-provider.com/v1/rerank",
-    api_key="your_api_key_here",
-    top_n=10
-)
-```
-
-### 2. Jina AI
-
-```python
-from lightrag.rerank import jina_rerank
-
-result = await jina_rerank(
-    query="your query",
-    documents=documents,
-    model="BAAI/bge-reranker-v2-m3",
-    api_key="your_jina_api_key",
-    top_n=10
-)
-```
-
-### 3. Cohere
-
-```python
-from lightrag.rerank import cohere_rerank
-
-result = await cohere_rerank(
-    query="your query",
-    documents=documents,
-    model="rerank-english-v2.0",
-    api_key="your_cohere_api_key",
-    top_n=10
-)
-```
-
-## Integration Points
-
-Reranking is automatically applied at these key retrieval stages:
-
-1. **Naive Mode**: After vector similarity search in `_get_vector_context`
-2. **Local Mode**: After entity retrieval in `_get_node_data`
-3. **Global Mode**: After relationship retrieval in `_get_edge_data`
-4. **Hybrid/Mix Modes**: Applied to all relevant components
-
-## Configuration Parameters
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `enable_rerank` | bool | False | Enable/disable reranking |
-| `rerank_model_func` | callable | None | Custom rerank function containing all configurations (model, API keys, top_n, etc.) |
-
-## Example Usage
-
-### Basic Usage
-
-```python
-import asyncio
-from lightrag import LightRAG, QueryParam
-from lightrag.llm.openai import gpt_4o_mini_complete, openai_embedding
-from lightrag.kg.shared_storage import initialize_pipeline_status
-from lightrag.rerank import jina_rerank
-
-async def my_rerank_func(query: str, documents: list, top_n: int = None, **kwargs):
-    """Custom rerank function with all settings included"""
-    return await jina_rerank(
-        query=query,
-        documents=documents,
-        model="BAAI/bge-reranker-v2-m3",
-        api_key="your_jina_api_key_here",
-        top_n=top_n or 10,  # Default top_n if not provided
-        **kwargs
-    )
-
-async def main():
-    # Initialize with rerank enabled
-    rag = LightRAG(
-        working_dir="./rag_storage",
-        llm_model_func=gpt_4o_mini_complete,
-        embedding_func=openai_embedding,
-        rerank_model_func=my_rerank_func,
-    )
-
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    # Insert documents
-    await rag.ainsert([
-        "Document 1 content...",
-        "Document 2 content...",
-    ])
-
-    # Query with rerank (automatically applied)
-    result = await rag.aquery(
-        "Your question here",
-        param=QueryParam(enable_rerank=True)  # This top_n is passed to rerank function
-    )
-
-    print(result)
-
-asyncio.run(main())
-```
-
-### Direct Rerank Usage
-
-```python
-from lightrag.rerank import custom_rerank
-
-async def test_rerank():
-    documents = [
-        {"content": "Text about topic A"},
-        {"content": "Text about topic B"},
-        {"content": "Text about topic C"},
-    ]
-
-    reranked = await custom_rerank(
-        query="Tell me about topic A",
-        documents=documents,
-        model="BAAI/bge-reranker-v2-m3",
-        base_url="https://api.your-provider.com/v1/rerank",
-        api_key="your_api_key_here",
-        top_n=2
-    )
-
-    for doc in reranked:
-        print(f"Score: {doc.get('rerank_score')}, Content: {doc.get('content')}")
-```
-
-## Best Practices
-
-1. **Self-Contained Functions**: Include all necessary configurations (API keys, models, top_n handling) within your rerank function
-2. **Performance**: Use reranking selectively for better performance vs. quality tradeoff
-3. **API Limits**: Monitor API usage and implement rate limiting within your rerank function
-4. **Fallback**: Always handle rerank failures gracefully (returns original results)
-5. **Top-n Handling**: Handle top_n parameter appropriately within your rerank function
-6. **Cost Management**: Consider rerank API costs in your budget planning
-
-## Troubleshooting
-
-### Common Issues
-
-1. **API Key Missing**: Ensure API keys are properly configured within your rerank function
-2. **Network Issues**: Check API endpoints and network connectivity
-3. **Model Errors**: Verify the rerank model name is supported by your API
-4. **Document Format**: Ensure documents have `content` or `text` fields
-
-### Debug Mode
-
-Enable debug logging to see rerank operations:
-
-```python
-import logging
-logging.getLogger("lightrag.rerank").setLevel(logging.DEBUG)
-```
-
-### Error Handling
-
-The rerank integration includes automatic fallback:
-
-```python
-# If rerank fails, original documents are returned
-# No exceptions are raised to the user
-# Errors are logged for debugging
-```
-
-## API Compatibility
-
-The generic rerank API expects this response format:
-
-```json
-{
-  "results": [
-    {
-      "index": 0,
-      "relevance_score": 0.95
-    },
-    {
-      "index": 2,
-      "relevance_score": 0.87
-    }
-  ]
-}
-```
-
-This is compatible with:
- Jina AI Rerank API
- Cohere Rerank API
- Custom APIs following the same format
--- a/env.example
+++ b/env.example
@ -8,6 +8,8 @@ PORT=9621
 WEBUI_TITLE='My Graph KB'
 WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # WORKERS=2
+### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
+# TIMEOUT=150
 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080

 ### Optional SSL Configuration
@ -83,31 +85,60 @@ ENABLE_LLM_CACHE=true
 ###     If reranking is enabled, the impact of chunk selection strategies will be diminished.
 # KG_CHUNK_PICK_METHOD=VECTOR

+#########################################################
 ### Reranking configuration
-###     Reranker Set ENABLE_RERANK to true in reranking model is configed
-# ENABLE_RERANK=True
-### Minimum rerank score for document chunk exclusion (set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enought)
+### RERANK_BINDING type:  null, cohere, jina, aliyun
+### For rerank model deployed by vLLM use cohere binding
+#########################################################
+RERANK_BINDING=null
+### Enable rerank by default in query params when RERANK_BINDING is not null
+# RERANK_BY_DEFAULT=True
+### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enought)
 # MIN_RERANK_SCORE=0.0
-### Rerank model configuration (required when ENABLE_RERANK=True)
-# RERANK_MODEL=jina-reranker-v2-base-multilingual
+
+### For local deployment with vLLM
+# RERANK_MODEL=BAAI/bge-reranker-v2-m3
+# RERANK_BINDING_HOST=http://localhost:8000/v1/rerank
+# RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
+### Default value for Cohere AI
+# RERANK_MODEL=rerank-v3.5
+# RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
+# RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
+### Default value for Jina AI
+# RERANK_MODELjina-reranker-v2-base-multilingual
 # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank
 # RERANK_BINDING_API_KEY=your_rerank_api_key_here

+### Default value for Aliyun
+# RERANK_MODEL=gte-rerank-v2
+# RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
+# RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
 ########################################
 ### Document processing configuration
 ########################################
-### Language: English, Chinese, French, German ...
-SUMMARY_LANGUAGE=English
 ENABLE_LLM_CACHE_FOR_EXTRACT=true
+
+### Document processing output language: English, Chinese, French, German ...
+SUMMARY_LANGUAGE=English
+
+### Entity types that the LLM will attempt to recognize
+# ENTITY_TYPES='["Organization", "Person", "Location", "Event", "Technology", "Equipment", "Product", "Document", "Category"]'
+
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
-### Entity and relation summarization configuration
-### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented)， and max tokens send to LLM
-# FORCE_LLM_SUMMARY_ON_MERGE=4
-# MAX_TOKENS=10000
-### Maximum number of entity extraction attempts for ambiguous content
-# MAX_GLEANING=1
+
+### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented)
+# FORCE_LLM_SUMMARY_ON_MERGE=8
+### Max description token size to trigger LLM summary
+# SUMMARY_MAX_TOKENS = 1200
+### Recommended LLM summary output length in tokens
+# SUMMARY_LENGTH_RECOMMENDED_=600
+### Maximum context size sent to LLM for description summary
+# SUMMARY_CONTEXT_SIZE=12000

 ###############################
 ### Concurrency Configuration
@ -125,9 +156,8 @@ MAX_PARALLEL_INSERT=2
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
 ###########################################################
-### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
-# TEMPERATURE=1.0
-### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
+### LLM request timeout setting for all llm (0 means no timeout for Ollma)
+# LLM_TIMEOUT=180

 LLM_BINDING=openai
 LLM_MODEL=gpt-4o
@ -145,29 +175,34 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai

 ### OpenAI Specific Parameters
-### Apply frequency penalty to prevent the LLM from generating repetitive or looping outputs
-# OPENAI_LLM_FREQUENCY_PENALTY=1.1
-### use the following command to see all support options for openai and azure_openai
+### To mitigate endless output, set the temperature to a highter value
+# OPENAI_LLM_TEMPERATURE=0.8
+
+### OpenRouter Specific Parameters
+# OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
+### Qwen3 Specific Parameters depoly by vLLM
+# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
+
+### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
 ### lightrag-server --llm-binding openai --help

 ### Ollama Server Specific Parameters
-### Time out in seconds, None for infinite timeout
-TIMEOUT=240
-### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000
+### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
 OLLAMA_LLM_NUM_CTX=32768
+# OLLAMA_LLM_TEMPERATURE=1.0
 ### Stop sequences for Ollama LLM
-# OLLAMA_LLM_STOP='["</s>", "Assistant:", "\n\n"]'
-### If OLLAMA_LLM_TEMPERATURE is not specified, the system will default to the value defined by TEMPERATURE
-# OLLAMA_LLM_TEMPERATURE=0.85
+# OLLAMA_LLM_STOP='["</s>", "<|EOT|>"]'
 ### use the following command to see all support options for Ollama LLM
 ### lightrag-server --llm-binding ollama --help

+### Bedrock Specific Parameters
+# BEDROCK_LLM_TEMPERATURE=1.0
+
 ####################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
+### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
 ####################################################################################
-### Embedding Binding type: ollama, openai, azure_openai, jina,  lollms
-
-### see also env.ollama-binding-options.example for fine tuning ollama
+# EMBEDDING_TIMEOUT=30
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
@ -179,7 +214,7 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
 # EMBEDDING_BINDING=openai
 # EMBEDDING_MODEL=text-embedding-3-large
 # EMBEDDING_DIM=3072
-# EMBEDDING_BINDING_HOST=https://api.openai.com
+# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 # EMBEDDING_BINDING_API_KEY=your_api_key

 ### Optional for Azure
@ -268,7 +303,7 @@ POSTGRES_IVFFLAT_LISTS=100
 NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
 NEO4J_USERNAME=neo4j
 NEO4J_PASSWORD='your_password'
-# NEO4J_DATABASE=chunk-entity-relation
+NEO4J_DATABASE=noe4j
 NEO4J_MAX_CONNECTION_POOL_SIZE=100
 NEO4J_CONNECTION_TIMEOUT=30
 NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30
--- a/examples/rerank_example.py
+++ b/examples/rerank_example.py
@ -5,15 +5,21 @@ This example demonstrates how to use rerank functionality with LightRAG
 to improve retrieval quality across different query modes.

 Configuration Required:
-1. Set your LLM API key and base URL in llm_model_func()
-2. Set your embedding API key and base URL in embedding_func()
-3. Set your rerank API key and base URL in the rerank configuration
-4. Or use environment variables (.env file):
-   - RERANK_MODEL=your_rerank_model
-   - RERANK_BINDING_HOST=your_rerank_endpoint
-   - RERANK_BINDING_API_KEY=your_rerank_api_key
+1. Set your OpenAI LLM API key and base URL with env vars
+    LLM_MODEL
+    LLM_BINDING_HOST
+    LLM_BINDING_API_KEY
+2. Set your OpenAI embedding API key and base URL with env vars:
+    EMBEDDING_MODEL
+    EMBEDDING_DIM
+    EMBEDDING_BINDING_HOST
+    EMBEDDING_BINDING_API_KEY
+3. Set your vLLM deployed AI rerank model setting with env vars:
+    RERANK_MODEL
+    RERANK_BINDING_HOST
+    RERANK_BINDING_API_KEY

-Note: Rerank is now controlled per query via the 'enable_rerank' parameter (default: True)
+Note: Rerank is controlled per query via the 'enable_rerank' parameter (default: True)
 """

 import asyncio
@ -21,11 +27,13 @@ import os
 import numpy as np

 from lightrag import LightRAG, QueryParam
-from lightrag.rerank import custom_rerank, RerankModel
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.utils import EmbeddingFunc, setup_logger
 from lightrag.kg.shared_storage import initialize_pipeline_status

+from functools import partial
+from lightrag.rerank import cohere_rerank
+
 # Set up your working directory
 WORKING_DIR = "./test_rerank"
 setup_logger("test_rerank")
@ -38,12 +46,12 @@ async def llm_model_func(
    prompt, system_prompt=None, history_messages=[], **kwargs
 ) -> str:
    return await openai_complete_if_cache(
-        "gpt-4o-mini",
+        os.getenv("LLM_MODEL"),
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
-        api_key="your_llm_api_key_here",
-        base_url="https://api.your-llm-provider.com/v1",
+        api_key=os.getenv("LLM_BINDING_API_KEY"),
+        base_url=os.getenv("LLM_BINDING_HOST"),
        **kwargs,
    )

@ -51,23 +59,18 @@ async def llm_model_func(
 async def embedding_func(texts: list[str]) -> np.ndarray:
    return await openai_embed(
        texts,
-        model="text-embedding-3-large",
-        api_key="your_embedding_api_key_here",
-        base_url="https://api.your-embedding-provider.com/v1",
+        model=os.getenv("EMBEDDING_MODEL"),
+        api_key=os.getenv("EMBEDDING_BINDING_API_KEY"),
+        base_url=os.getenv("EMBEDDING_BINDING_HOST"),
    )


-async def my_rerank_func(query: str, documents: list, top_n: int = None, **kwargs):
-    """Custom rerank function with all settings included"""
-    return await custom_rerank(
-        query=query,
-        documents=documents,
-        model="BAAI/bge-reranker-v2-m3",
-        base_url="https://api.your-rerank-provider.com/v1/rerank",
-        api_key="your_rerank_api_key_here",
-        top_n=top_n or 10,
-        **kwargs,
-    )
+rerank_model_func = partial(
+    cohere_rerank,
+    model=os.getenv("RERANK_MODEL"),
+    api_key=os.getenv("RERANK_BINDING_API_KEY"),
+    base_url=os.getenv("RERANK_BINDING_HOST"),
+)


 async def create_rag_with_rerank():
@ -88,42 +91,7 @@ async def create_rag_with_rerank():
            func=embedding_func,
        ),
        # Rerank Configuration - provide the rerank function
-        rerank_model_func=my_rerank_func,
-    )
-
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    return rag
-
-
-async def create_rag_with_rerank_model():
-    """Alternative: Create LightRAG instance using RerankModel wrapper"""
-
-    # Get embedding dimension
-    test_embedding = await embedding_func(["test"])
-    embedding_dim = test_embedding.shape[1]
-    print(f"Detected embedding dimension: {embedding_dim}")
-
-    # Method 2: Using RerankModel wrapper
-    rerank_model = RerankModel(
-        rerank_func=custom_rerank,
-        kwargs={
-            "model": "BAAI/bge-reranker-v2-m3",
-            "base_url": "https://api.your-rerank-provider.com/v1/rerank",
-            "api_key": "your_rerank_api_key_here",
-        },
-    )
-
-    rag = LightRAG(
-        working_dir=WORKING_DIR,
-        llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=embedding_dim,
-            max_token_size=8192,
-            func=embedding_func,
-        ),
-        rerank_model_func=rerank_model.rerank,
+        rerank_model_func=rerank_model_func,
    )

    await rag.initialize_storages()
@ -136,7 +104,7 @@ async def test_rerank_with_different_settings():
    """
    Test rerank functionality with different enable_rerank settings
    """
-    print("🚀 Setting up LightRAG with Rerank functionality...")
+    print("\n\n🚀 Setting up LightRAG with Rerank functionality...")

    rag = await create_rag_with_rerank()

@ -199,11 +167,11 @@ async def test_direct_rerank():
    print("=" * 40)

    documents = [
-        {"content": "Reranking significantly improves retrieval quality"},
-        {"content": "LightRAG supports advanced reranking capabilities"},
-        {"content": "Vector search finds semantically similar documents"},
-        {"content": "Natural language processing with modern transformers"},
-        {"content": "The quick brown fox jumps over the lazy dog"},
+        "Vector search finds semantically similar documents",
+        "LightRAG supports advanced reranking capabilities",
+        "Reranking significantly improves retrieval quality",
+        "Natural language processing with modern transformers",
+        "The quick brown fox jumps over the lazy dog",
    ]

    query = "rerank improve quality"
@ -211,20 +179,20 @@ async def test_direct_rerank():
    print(f"Documents: {len(documents)}")

    try:
-        reranked_docs = await custom_rerank(
+        reranked_results = await rerank_model_func(
            query=query,
            documents=documents,
-            model="BAAI/bge-reranker-v2-m3",
-            base_url="https://api.your-rerank-provider.com/v1/rerank",
-            api_key="your_rerank_api_key_here",
-            top_n=3,
+            top_n=4,
        )

        print("\n✅ Rerank Results:")
-        for i, doc in enumerate(reranked_docs):
-            score = doc.get("rerank_score", "N/A")
-            content = doc.get("content", "")[:60]
-            print(f"  {i+1}. Score: {score:.4f} | {content}...")
+        i = 0
+        for result in reranked_results:
+            index = result["index"]
+            score = result["relevance_score"]
+            content = documents[index]
+            print(f"  {index}. Score: {score:.4f} | {content}...")
+            i += 1

    except Exception as e:
        print(f"❌ Rerank failed: {e}")
@ -236,12 +204,12 @@ async def main():
    print("=" * 60)

    try:
-        # Test rerank with different enable_rerank settings
-        await test_rerank_with_different_settings()
-
        # Test direct rerank
        await test_direct_rerank()

+        # Test rerank with different enable_rerank settings
+        await test_rerank_with_different_settings()
+
        print("\n✅ Example completed successfully!")
        print("\n💡 Key Points:")
        print("   ✓ Rerank is now controlled per query via 'enable_rerank' parameter")
--- a/lightrag/init.py
+++ b/lightrag/init.py
@ -1,5 +1,5 @@
 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam

-__version__ = "1.4.7"
+__version__ = "1.4.8"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -100,11 +100,63 @@ def setup_signal_handlers():
    signal.signal(signal.SIGTERM, signal_handler)  # kill command


+class LLMConfigCache:
+    """Smart LLM and Embedding configuration cache class"""
+
+    def __init__(self, args):
+        self.args = args
+
+        # Initialize configurations based on binding conditions
+        self.openai_llm_options = None
+        self.ollama_llm_options = None
+        self.ollama_embedding_options = None
+
+        # Only initialize and log OpenAI options when using OpenAI-related bindings
+        if args.llm_binding in ["openai", "azure_openai"]:
+            from lightrag.llm.binding_options import OpenAILLMOptions
+
+            self.openai_llm_options = OpenAILLMOptions.options_dict(args)
+            logger.info(f"OpenAI LLM Options: {self.openai_llm_options}")
+
+        # Only initialize and log Ollama LLM options when using Ollama LLM binding
+        if args.llm_binding == "ollama":
+            try:
+                from lightrag.llm.binding_options import OllamaLLMOptions
+
+                self.ollama_llm_options = OllamaLLMOptions.options_dict(args)
+                logger.info(f"Ollama LLM Options: {self.ollama_llm_options}")
+            except ImportError:
+                logger.warning(
+                    "OllamaLLMOptions not available, using default configuration"
+                )
+                self.ollama_llm_options = {}
+
+        # Only initialize and log Ollama Embedding options when using Ollama Embedding binding
+        if args.embedding_binding == "ollama":
+            try:
+                from lightrag.llm.binding_options import OllamaEmbeddingOptions
+
+                self.ollama_embedding_options = OllamaEmbeddingOptions.options_dict(
+                    args
+                )
+                logger.info(
+                    f"Ollama Embedding Options: {self.ollama_embedding_options}"
+                )
+            except ImportError:
+                logger.warning(
+                    "OllamaEmbeddingOptions not available, using default configuration"
+                )
+                self.ollama_embedding_options = {}
+
+
 def create_app(args):
    # Setup logging
    logger.setLevel(args.log_level)
    set_verbose_debug(args.verbose)

+    # Create configuration cache (this will output configuration logs)
+    config_cache = LLMConfigCache(args)
+
    # Verify that bindings are correctly setup
    if args.llm_binding not in [
        "lollms",
@ -241,10 +293,85 @@ def create_app(args):
    # Create working directory if it doesn't exist
    Path(args.working_dir).mkdir(parents=True, exist_ok=True)

+    def create_optimized_openai_llm_func(
+        config_cache: LLMConfigCache, args, llm_timeout: int
+    ):
+        """Create optimized OpenAI LLM function with pre-processed configuration"""
+
+        async def optimized_openai_alike_model_complete(
+            prompt,
+            system_prompt=None,
+            history_messages=None,
+            keyword_extraction=False,
+            **kwargs,
+        ) -> str:
+            from lightrag.llm.openai import openai_complete_if_cache
+
+            keyword_extraction = kwargs.pop("keyword_extraction", None)
+            if keyword_extraction:
+                kwargs["response_format"] = GPTKeywordExtractionFormat
+            if history_messages is None:
+                history_messages = []
+
+            # Use pre-processed configuration to avoid repeated parsing
+            kwargs["timeout"] = llm_timeout
+            if config_cache.openai_llm_options:
+                kwargs.update(config_cache.openai_llm_options)
+
+            return await openai_complete_if_cache(
+                args.llm_model,
+                prompt,
+                system_prompt=system_prompt,
+                history_messages=history_messages,
+                base_url=args.llm_binding_host,
+                api_key=args.llm_binding_api_key,
+                **kwargs,
+            )
+
+        return optimized_openai_alike_model_complete
+
+    def create_optimized_azure_openai_llm_func(
+        config_cache: LLMConfigCache, args, llm_timeout: int
+    ):
+        """Create optimized Azure OpenAI LLM function with pre-processed configuration"""
+
+        async def optimized_azure_openai_model_complete(
+            prompt,
+            system_prompt=None,
+            history_messages=None,
+            keyword_extraction=False,
+            **kwargs,
+        ) -> str:
+            from lightrag.llm.azure_openai import azure_openai_complete_if_cache
+
+            keyword_extraction = kwargs.pop("keyword_extraction", None)
+            if keyword_extraction:
+                kwargs["response_format"] = GPTKeywordExtractionFormat
+            if history_messages is None:
+                history_messages = []
+
+            # Use pre-processed configuration to avoid repeated parsing
+            kwargs["timeout"] = llm_timeout
+            if config_cache.openai_llm_options:
+                kwargs.update(config_cache.openai_llm_options)
+
+            return await azure_openai_complete_if_cache(
+                args.llm_model,
+                prompt,
+                system_prompt=system_prompt,
+                history_messages=history_messages,
+                base_url=args.llm_binding_host,
+                api_key=os.getenv("AZURE_OPENAI_API_KEY", args.llm_binding_api_key),
+                api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
+                **kwargs,
+            )
+
+        return optimized_azure_openai_model_complete
+
    def create_llm_model_func(binding: str):
        """
        Create LLM model function based on binding type.
-        Uses lazy import to avoid unnecessary dependencies.
+        Uses optimized functions for OpenAI bindings and lazy import for others.
        """
        try:
            if binding == "lollms":
@ -258,9 +385,13 @@ def create_app(args):
            elif binding == "aws_bedrock":
                return bedrock_model_complete  # Already defined locally
            elif binding == "azure_openai":
-                return azure_openai_model_complete  # Already defined locally
+                # Use optimized function with pre-processed configuration
+                return create_optimized_azure_openai_llm_func(
+                    config_cache, args, llm_timeout
+                )
            else:  # openai and compatible
-                return openai_alike_model_complete  # Already defined locally
+                # Use optimized function with pre-processed configuration
+                return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
        except ImportError as e:
            raise Exception(f"Failed to import {binding} LLM binding: {e}")

@ -283,15 +414,15 @@ def create_app(args):
                raise Exception(f"Failed to import {binding} options: {e}")
        return {}

-    def create_embedding_function_with_lazy_import(
-        binding, model, host, api_key, dimensions, args
+    def create_optimized_embedding_function(
+        config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
    ):
        """
-        Create embedding function with lazy imports for all bindings.
-        Replaces the current create_embedding_function with full lazy import support.
+        Create optimized embedding function with pre-processed configuration for applicable bindings.
+        Uses lazy imports for all bindings and avoids repeated configuration parsing.
        """

-        async def embedding_function(texts):
+        async def optimized_embedding_function(texts):
            try:
                if binding == "lollms":
                    from lightrag.llm.lollms import lollms_embed
@ -300,10 +431,17 @@ def create_app(args):
                        texts, embed_model=model, host=host, api_key=api_key
                    )
                elif binding == "ollama":
-                    from lightrag.llm.binding_options import OllamaEmbeddingOptions
                    from lightrag.llm.ollama import ollama_embed

-                    ollama_options = OllamaEmbeddingOptions.options_dict(args)
+                    # Use pre-processed configuration if available, otherwise fallback to dynamic parsing
+                    if config_cache.ollama_embedding_options is not None:
+                        ollama_options = config_cache.ollama_embedding_options
+                    else:
+                        # Fallback for cases where config cache wasn't initialized properly
+                        from lightrag.llm.binding_options import OllamaEmbeddingOptions
+
+                        ollama_options = OllamaEmbeddingOptions.options_dict(args)
+
                    return await ollama_embed(
                        texts,
                        embed_model=model,
@ -334,78 +472,13 @@ def create_app(args):
            except ImportError as e:
                raise Exception(f"Failed to import {binding} embedding: {e}")

-        return embedding_function
+        return optimized_embedding_function

    llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int)
    embedding_timeout = get_env_value(
        "EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int
    )

-    async def openai_alike_model_complete(
-        prompt,
-        system_prompt=None,
-        history_messages=None,
-        keyword_extraction=False,
-        **kwargs,
-    ) -> str:
-        # Lazy import
-        from lightrag.llm.openai import openai_complete_if_cache
-        from lightrag.llm.binding_options import OpenAILLMOptions
-
-        keyword_extraction = kwargs.pop("keyword_extraction", None)
-        if keyword_extraction:
-            kwargs["response_format"] = GPTKeywordExtractionFormat
-        if history_messages is None:
-            history_messages = []
-
-        # Use OpenAI LLM options if available
-        openai_options = OpenAILLMOptions.options_dict(args)
-        kwargs["timeout"] = llm_timeout
-        kwargs.update(openai_options)
-
-        return await openai_complete_if_cache(
-            args.llm_model,
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            base_url=args.llm_binding_host,
-            api_key=args.llm_binding_api_key,
-            **kwargs,
-        )
-
-    async def azure_openai_model_complete(
-        prompt,
-        system_prompt=None,
-        history_messages=None,
-        keyword_extraction=False,
-        **kwargs,
-    ) -> str:
-        # Lazy import
-        from lightrag.llm.azure_openai import azure_openai_complete_if_cache
-        from lightrag.llm.binding_options import OpenAILLMOptions
-
-        keyword_extraction = kwargs.pop("keyword_extraction", None)
-        if keyword_extraction:
-            kwargs["response_format"] = GPTKeywordExtractionFormat
-        if history_messages is None:
-            history_messages = []
-
-        # Use OpenAI LLM options
-        openai_options = OpenAILLMOptions.options_dict(args)
-        kwargs["timeout"] = llm_timeout
-        kwargs.update(openai_options)
-
-        return await azure_openai_complete_if_cache(
-            args.llm_model,
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            base_url=args.llm_binding_host,
-            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
-            api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
-            **kwargs,
-        )
-
    async def bedrock_model_complete(
        prompt,
        system_prompt=None,
@ -433,16 +506,17 @@ def create_app(args):
            **kwargs,
        )

-    # Create embedding function with lazy imports
+    # Create embedding function with optimized configuration
    embedding_func = EmbeddingFunc(
        embedding_dim=args.embedding_dim,
-        func=create_embedding_function_with_lazy_import(
+        func=create_optimized_embedding_function(
+            config_cache=config_cache,
            binding=args.embedding_binding,
            model=args.embedding_model,
            host=args.embedding_binding_host,
            api_key=args.embedding_binding_api_key,
            dimensions=args.embedding_dim,
-            args=args,  # Pass args object for dynamic option generation
+            args=args,  # Pass args object for fallback option generation
        ),
    )

@ -675,6 +749,9 @@ def create_app(args):
        logger.info(
            "The system will run in basic mode and only support standard document processing functions"
        )
+    except Exception as e:
+        logger.error(f"Failed to initialize LightRAG: {e}")
+        raise

    if not raganything_enabled:
        logger.info(
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@ -2485,7 +2485,8 @@ def create_document_routes(
                - cur_batch (int): Current processing batch
                - request_pending (bool): Flag for pending request for processing
                - latest_message (str): Latest message from pipeline processing
-                - history_messages (List[str], optional): List of history messages
+                - history_messages (List[str], optional): List of history messages (limited to latest 1000 entries,
+                  with truncation message if more than 1000 messages exist)

        Raises:
            HTTPException: If an error occurs while retrieving pipeline status (500)
@ -2520,8 +2521,28 @@ def create_document_routes(
            status_dict["update_status"] = processed_update_status

            # Convert history_messages to a regular list if it's a Manager.list
+            # and limit to latest 1000 entries with truncation message if needed
            if "history_messages" in status_dict:
-                status_dict["history_messages"] = list(status_dict["history_messages"])
+                history_list = list(status_dict["history_messages"])
+                total_count = len(history_list)
+
+                if total_count > 1000:
+                    # Calculate truncated message count
+                    truncated_count = total_count - 1000
+
+                    # Take only the latest 1000 messages
+                    latest_messages = history_list[-1000:]
+
+                    # Add truncation message at the beginning
+                    truncation_message = (
+                        f"[Truncated history messages: {truncated_count}/{total_count}]"
+                    )
+                    status_dict["history_messages"] = [
+                        truncation_message
+                    ] + latest_messages
+                else:
+                    # No truncation needed, return all messages
+                    status_dict["history_messages"] = history_list

            # Ensure job_start is properly formatted as a string with timezone information
            if "job_start" in status_dict and status_dict["job_start"]:
--- a/lightrag/kg/networkx_impl.py
+++ b/lightrag/kg/networkx_impl.py
@ -51,7 +51,7 @@ class NetworkXStorage(BaseGraphStorage):

        os.makedirs(workspace_dir, exist_ok=True)
        self._graphml_xml_file = os.path.join(
-            working_dir, f"graph_{self.namespace}.graphml"
+            workspace_dir, f"graph_{self.namespace}.graphml"
        )
        self._storage_lock = None
        self.storage_updated = None
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -1084,7 +1084,7 @@ class LightRAG:

            tasks = [
                self.chunks_vdb.upsert(inserting_chunks),
-                self._process_entity_relation_graph(inserting_chunks),
+                self._process_extract_entities(inserting_chunks),
                self.full_docs.upsert(new_docs),
                self.text_chunks.upsert(inserting_chunks),
            ]
@ -1699,7 +1699,7 @@ class LightRAG:

                            # Stage 2: Process entity relation graph (after text_chunks are saved)
                            entity_relation_task = asyncio.create_task(
-                                self._process_entity_relation_graph(
+                                self._process_extract_entities(
                                    chunks, pipeline_status, pipeline_status_lock
                                )
                            )
@ -1940,7 +1940,7 @@ class LightRAG:
                pipeline_status["latest_message"] = log_message
                pipeline_status["history_messages"].append(log_message)

-    async def _process_entity_relation_graph(
+    async def _process_extract_entities(
        self, chunk: dict[str, Any], pipeline_status=None, pipeline_status_lock=None
    ) -> list:
        try:
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@ -1,101 +0,0 @@
-from __future__ import annotations
-
-from typing import Callable, Any
-from pydantic import BaseModel, Field
-
-
-class Model(BaseModel):
-    """
-    This is a Pydantic model class named 'Model' that is used to define a custom language model.
-
-    Attributes:
-        gen_func (Callable[[Any], str]): A callable function that generates the response from the language model.
-            The function should take any argument and return a string.
-        kwargs (Dict[str, Any]): A dictionary that contains the arguments to pass to the callable function.
-            This could include parameters such as the model name, API key, etc.
-
-    Example usage:
-        Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_1"]})
-
-    In this example, 'openai_complete_if_cache' is the callable function that generates the response from the OpenAI model.
-    The 'kwargs' dictionary contains the model name and API key to be passed to the function.
-    """
-
-    gen_func: Callable[[Any], str] = Field(
-        ...,
-        description="A function that generates the response from the llm. The response must be a string",
-    )
-    kwargs: dict[str, Any] = Field(
-        ...,
-        description="The arguments to pass to the callable function. Eg. the api key, model name, etc",
-    )
-
-    class Config:
-        arbitrary_types_allowed = True
-
-
-class MultiModel:
-    """
-    Distributes the load across multiple language models. Useful for circumventing low rate limits with certain api providers especially if you are on the free tier.
-    Could also be used for spliting across diffrent models or providers.
-
-    Attributes:
-        models (List[Model]): A list of language models to be used.
-
-    Usage example:
-        ```python
-        models = [
-            Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_1"]}),
-            Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_2"]}),
-            Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_3"]}),
-            Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_4"]}),
-            Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_5"]}),
-        ]
-        multi_model = MultiModel(models)
-        rag = LightRAG(
-            llm_model_func=multi_model.llm_model_func
-            / ..other args
-            )
-        ```
-    """
-
-    def __init__(self, models: list[Model]):
-        self._models = models
-        self._current_model = 0
-
-    def _next_model(self):
-        self._current_model = (self._current_model + 1) % len(self._models)
-        return self._models[self._current_model]
-
-    async def llm_model_func(
-        self,
-        prompt: str,
-        system_prompt: str | None = None,
-        history_messages: list[dict[str, Any]] = [],
-        **kwargs: Any,
-    ) -> str:
-        kwargs.pop("model", None)  # stop from overwriting the custom model name
-        kwargs.pop("keyword_extraction", None)
-        kwargs.pop("mode", None)
-        next_model = self._next_model()
-        args = dict(
-            prompt=prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            **kwargs,
-            **next_model.kwargs,
-        )
-
-        return await next_model.gen_func(**args)
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    async def main():
-        from lightrag.llm.openai import gpt_4o_mini_complete
-
-        result = await gpt_4o_mini_complete("How are you?")
-        print(result)
-
-    asyncio.run(main())
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -679,19 +679,34 @@ async def _rebuild_knowledge_from_chunks(
    # Execute all tasks in parallel with semaphore control and early failure detection
    done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)

-    # Check if any task raised an exception
+    # Check if any task raised an exception and ensure all exceptions are retrieved
+    first_exception = None
+
    for task in done:
-        if task.exception():
-            # If a task failed, cancel all pending tasks
-            for pending_task in pending:
-                pending_task.cancel()
+        try:
+            exception = task.exception()
+            if exception is not None:
+                if first_exception is None:
+                    first_exception = exception
+            else:
+                # Task completed successfully, retrieve result to mark as processed
+                task.result()
+        except Exception as e:
+            if first_exception is None:
+                first_exception = e

-            # Wait for cancellation to complete
-            if pending:
-                await asyncio.wait(pending)
+    # If any task failed, cancel all pending tasks and raise the first exception
+    if first_exception is not None:
+        # Cancel all pending tasks
+        for pending_task in pending:
+            pending_task.cancel()

-            # Re-raise the exception to notify the caller
-            raise task.exception()
+        # Wait for cancellation to complete
+        if pending:
+            await asyncio.wait(pending)
+
+        # Re-raise the first exception to notify the caller
+        raise first_exception

    # Final status report
    status_message = f"KG rebuild completed: {rebuilt_entities_count} entities and {rebuilt_relationships_count} relationships rebuilt successfully."
@ -855,6 +870,10 @@ async def _process_extraction_result(
            record = record.replace("<|>>", "<|>")
            # fix <<|> with <|>
            record = record.replace("<<|>", "<|>")
+            # fix <.|> with <|>
+            record = record.replace("<.|>", "<|>")
+            # fix <|.> with <|>
+            record = record.replace("<|.>", "<|>")

        record_attributes = split_string_by_multi_markers(record, [tuple_delimiter])

@ -1619,17 +1638,32 @@ async def merge_nodes_and_edges(
            entity_tasks, return_when=asyncio.FIRST_EXCEPTION
        )

-        # Check if any task raised an exception
+        # Check if any task raised an exception and ensure all exceptions are retrieved
+        first_exception = None
+        successful_results = []
+
        for task in done:
-            if task.exception():
-                # If a task failed, cancel all pending tasks
-                for pending_task in pending:
-                    pending_task.cancel()
-                # Wait for cancellation to complete
-                if pending:
-                    await asyncio.wait(pending)
-                # Re-raise the exception to notify the caller
-                raise task.exception()
+            try:
+                exception = task.exception()
+                if exception is not None:
+                    if first_exception is None:
+                        first_exception = exception
+                else:
+                    successful_results.append(task.result())
+            except Exception as e:
+                if first_exception is None:
+                    first_exception = e
+
+        # If any task failed, cancel all pending tasks and raise the first exception
+        if first_exception is not None:
+            # Cancel all pending tasks
+            for pending_task in pending:
+                pending_task.cancel()
+            # Wait for cancellation to complete
+            if pending:
+                await asyncio.wait(pending)
+            # Re-raise the first exception to notify the caller
+            raise first_exception

        # If all tasks completed successfully, collect results
        processed_entities = [task.result() for task in entity_tasks]
@ -1737,17 +1771,32 @@ async def merge_nodes_and_edges(
            edge_tasks, return_when=asyncio.FIRST_EXCEPTION
        )

-        # Check if any task raised an exception
+        # Check if any task raised an exception and ensure all exceptions are retrieved
+        first_exception = None
+        successful_results = []
+
        for task in done:
-            if task.exception():
-                # If a task failed, cancel all pending tasks
-                for pending_task in pending:
-                    pending_task.cancel()
-                # Wait for cancellation to complete
-                if pending:
-                    await asyncio.wait(pending)
-                # Re-raise the exception to notify the caller
-                raise task.exception()
+            try:
+                exception = task.exception()
+                if exception is not None:
+                    if first_exception is None:
+                        first_exception = exception
+                else:
+                    successful_results.append(task.result())
+            except Exception as e:
+                if first_exception is None:
+                    first_exception = e
+
+        # If any task failed, cancel all pending tasks and raise the first exception
+        if first_exception is not None:
+            # Cancel all pending tasks
+            for pending_task in pending:
+                pending_task.cancel()
+            # Wait for cancellation to complete
+            if pending:
+                await asyncio.wait(pending)
+            # Re-raise the first exception to notify the caller
+            raise first_exception

        # If all tasks completed successfully, collect results
        for task in edge_tasks:
@ -2023,23 +2072,36 @@ async def extract_entities(
    # This allows us to cancel remaining tasks if any task fails
    done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)

-    # Check if any task raised an exception
+    # Check if any task raised an exception and ensure all exceptions are retrieved
+    first_exception = None
+    chunk_results = []
+
    for task in done:
-        if task.exception():
-            # If a task failed, cancel all pending tasks
-            # This prevents unnecessary processing since the parent function will abort anyway
-            for pending_task in pending:
-                pending_task.cancel()
+        try:
+            exception = task.exception()
+            if exception is not None:
+                if first_exception is None:
+                    first_exception = exception
+            else:
+                chunk_results.append(task.result())
+        except Exception as e:
+            if first_exception is None:
+                first_exception = e

-            # Wait for cancellation to complete
-            if pending:
-                await asyncio.wait(pending)
+    # If any task failed, cancel all pending tasks and raise the first exception
+    if first_exception is not None:
+        # Cancel all pending tasks
+        for pending_task in pending:
+            pending_task.cancel()

-            # Re-raise the exception to notify the caller
-            raise task.exception()
+        # Wait for cancellation to complete
+        if pending:
+            await asyncio.wait(pending)

-    # If all tasks completed successfully, collect results
-    chunk_results = [task.result() for task in tasks]
+        # Re-raise the first exception to notify the caller
+        raise first_exception
+
+    # If all tasks completed successfully, chunk_results already contains the results

    # Return the chunk_results for later processing in merge_nodes_and_edges
    return chunk_results
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@ -11,32 +11,33 @@ PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
 PROMPTS["DEFAULT_USER_PROMPT"] = "n/a"

 PROMPTS["entity_extraction"] = """---Task---
-Given a text document and a list of entity types, identify all entities of those types and all relationships among the identified entities.
+
+For a given text and a list of entity types, extract all entities and their relationships, then return them in the specified language and format described below.

 ---Instructions---
 1. Recognizing definitively conceptualized entities in text. For each identified entity, extract the following information:
  - entity_name: Name of the entity, use same language as input text. If English, capitalized the name
-  - entity_type: Categorize the entity using the provided `Entity_types` list. If a suitable category cannot be determined, classify it as "Other".
+  - entity_type: Categorize the entity using the provided `Entity_types` list. If a suitable category cannot be determined, classify it as `Other`.
  - entity_description: Provide a comprehensive description of the entity's attributes and activities based on the information present in the input text. To ensure clarity and precision, all descriptions must replace pronouns and referential terms (e.g., "this document," "our company," "I," "you," "he/she") with the specific nouns they represent.
-2. Format each entity as: ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>)
-3. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are directly and clearly related based on the text. Unsubstantiated relationships must be excluded from the output.
-For each pair of related entities, extract the following information:
-  - source_entity: name of the source entity, as identified in step 1
-  - target_entity: name of the target entity, as identified in step 1
+2. Format each entity as: (entity{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>)
+3. From the entities identified, identify all pairs of (source_entity, target_entity) that are directly and clearly related, and extract the following information:
+  - source_entity: name of the source entity
+  - target_entity: name of the target entity
  - relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
  - relationship_description: Explain the nature of the relationship between the source and target entities, providing a clear rationale for their connection
-4. Format each relationship as: ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_keywords>{tuple_delimiter}<relationship_description>)
+4. Format each relationship as: (relationship{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_keywords>{tuple_delimiter}<relationship_description>)
 5. Use `{tuple_delimiter}` as field delimiter. Use `{record_delimiter}` as the entity or relation list delimiter.
-6. Return identified entities and relationships in {language}.
-7. Output `{completion_delimiter}` when all the entities and relationships are extracted.
+6. Output `{completion_delimiter}` when all the entities and relationships are extracted.
+7. Ensure the output language is {language}.

 ---Quality Guidelines---
- Only extract entities that are clearly defined and meaningful in the context
+- Only extract entities and relationships that are clearly defined and meaningful in the context
 - Avoid over-interpretation; stick to what is explicitly stated in the text
 - For all output content, explicitly name the subject or object rather than using pronouns
 - Include specific numerical data in entity name when relevant
 - Ensure entity names are consistent throughout the extraction

+
 ---Examples---
 {examples}

@ -50,6 +51,18 @@ Text:
 ---Output---
 """

+PROMPTS["entity_continue_extraction"] = """---Task---
+Identify any missed entities or relationships in the last extraction task.
+
+---Instructions---
+1. Output the entities and realtionships in the same format as previous extraction task.
+2. Do not include entities and relations that have been previously extracted.
+3. If the entity doesn't clearly fit in any of`Entity_types` provided, classify it as "Other".
+4. Ensure the output language is {language}.
+
+---Output---
+"""
+
 PROMPTS["entity_extraction_examples"] = [
    """[Example 1]

@ -181,28 +194,6 @@ Description List:
 ---Output---
 """

-PROMPTS["entity_continue_extraction"] = """---Task---
-Identify any missed entities or relationships in the last extraction task.
-
---Instructions---
-1. Output the entities and realtionships in the same format as previous extraction task.
-2. Do not include entities and relations that have been previously extracted.
-3. If the entity doesn't clearly fit in any of`Entity_types` provided, classify it as "Other".
-4. Return identified entities and relationships in {language}.
-5. Output `{completion_delimiter}` when all the entities and relationships are extracted.
-
---Output---
-"""
-
-# TODO: Deprecated
-PROMPTS["entity_if_loop_extraction"] = """
---Goal---'
-
-Check if it appears some entities may have still been missed. Output "Yes" if so, otherwise "No".
-
---Output---
-Output:"""
-
 PROMPTS["fail_response"] = (
    "Sorry, I'm not able to provide an answer to that question.[no-context]"
 )
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -1896,6 +1896,12 @@ def normalize_extracted_info(name: str, remove_inner_quotes=False) -> str:
            if "‘" not in inner_content and "’" not in inner_content:
                name = inner_content

+        # Handle Chinese-style book title mark
+        if name.startswith("《") and name.endswith("》"):
+            inner_content = name[1:-1]
+            if "《" not in inner_content and "》" not in inner_content:
+                name = inner_content
+
    if remove_inner_quotes:
        # Remove Chinese quotes
        name = name.replace("“", "").replace("”", "").replace("‘", "").replace("’", "")
@ -1995,8 +2001,8 @@ def sanitize_text_for_encoding(text: str, replacement_char: str = "") -> str:
        # Unescape HTML escapes
        sanitized = html.unescape(sanitized)

-        # Remove control characters
-        sanitized = re.sub(r"[\x00-\x1f\x7f-\x9f]", "", sanitized)
+        # Remove control characters but preserve common whitespace (\t, \n, \r)
+        sanitized = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]", "", sanitized)

        return sanitized.strip()

--- a/pyproject.toml
+++ b/pyproject.toml
@ -25,7 +25,7 @@ dependencies = [
    "configparser",
    "dotenv",
    "future",
-    "json-repair",
+    "json_repair",
    "nano-vectordb",
    "networkx",
    "numpy",
@ -47,6 +47,9 @@ api = [
    "configparser",
    "dotenv",
    "future",
+    "json_repair",
+    "nano-vectordb",
+    "networkx",
    "numpy",
    "openai",
    "pandas>=2.0.0",