From 62cdc7d7eba7d5126ce107672a56c4c43b13dafd Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Thu, 21 Aug 2025 13:59:14 +0800
Subject: [PATCH] Update documentation with LLM selection guidelines and API
 improvements

---
 README-zh.md              | 2 ++
 README.md                 | 6 ++++++
 lightrag/api/README-zh.md | 5 ++++-
 lightrag/api/README.md    | 4 +++-
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/README-zh.md b/README-zh.md
index 5caefc89..549c27a9 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -142,6 +142,8 @@ LightRAG对大型语言模型（LLM）的能力要求远高于传统RAG，因为
 - **LLM选型**：
   - 推荐选用参数量至少为32B的LLM。
   - 上下文长度至少为32KB，推荐达到64KB。
+  - 在文档索引阶段不建议选择推理模型。
+  - 在查询阶段建议选择比索引阶段能力更强的模型，以达到更高的查询效果。
 - **Embedding模型**：
   - 高性能的Embedding模型对RAG至关重要。
   - 推荐使用主流的多语言Embedding模型，例如：BAAI/bge-m3 和 text-embedding-3-large。
diff --git a/README.md b/README.md
index 51c68a1a..892ec6d0 100644
--- a/README.md
+++ b/README.md
@@ -141,6 +141,8 @@ LightRAG's demands on the capabilities of Large Language Models (LLMs) are signi
 - **LLM Selection**:
   - It is recommended to use an LLM with at least 32 billion parameters.
   - The context length should be at least 32KB, with 64KB being recommended.
+  - It is not recommended to choose reasoning models during the document indexing stage.
+  - During the query stage, it is recommended to choose models with stronger capabilities than those used in the indexing stage to achieve better query results.
 - **Embedding Model**:
   - A high-performance Embedding model is essential for RAG.
   - We recommend using mainstream multilingual Embedding models, such as: `BAAI/bge-m3` and `text-embedding-3-large`.
@@ -1287,8 +1289,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                     ),
                 )
             )
+
             # Initialize storage (this will load existing data if available)
             await lightrag_instance.initialize_storages()
+
             # Now initialize RAGAnything with the existing LightRAG instance
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # Pass the existing LightRAG instance
@@ -1317,12 +1321,14 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                 )
                 # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
             )
+
             # Query the existing knowledge base
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
+
             # Add new multimodal documents to the existing LightRAG instance
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
diff --git a/lightrag/api/README-zh.md b/lightrag/api/README-zh.md
index 286b78b9..bc6352ec 100644
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@@ -357,7 +357,7 @@ API 服务器可以通过三种方式配置（优先级从高到低）：
 LightRAG 支持绑定到各种 LLM/嵌入后端：
 
 * ollama
-* openai 和 openai 兼容
+* openai (含openai 兼容)
 * azure_openai
 * lollms
 * aws_bedrock
@@ -372,7 +372,10 @@ lightrag-server --llm-binding ollama --help
 lightrag-server --embedding-binding ollama --help
 ```
 
+> 请使用openai兼容方式访问OpenRouter或vLLM部署的LLM。可以通过 `OPENAI_LLM_EXTRA_BODY` 环境变量给OpenRouter或vLLM传递额外的参数，实现推理模式的关闭或者其它个性化控制。
+
 ### 实体提取配置
+
 * ENABLE_LLM_CACHE_FOR_EXTRACT：为实体提取启用 LLM 缓存（默认：true）
 
 在测试环境中将 `ENABLE_LLM_CACHE_FOR_EXTRACT` 设置为 true 以减少 LLM 调用成本是很常见的做法。
diff --git a/lightrag/api/README.md b/lightrag/api/README.md
index 8b4f239a..9329a1af 100644
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -360,7 +360,7 @@ Most of the configurations come with default settings; check out the details in
 LightRAG supports binding to various LLM/Embedding backends:
 
 * ollama
-* openai & openai compatible
+* openai (including openai compatible)
 * azure_openai
 * lollms
 * aws_bedrock
@@ -374,6 +374,8 @@ lightrag-server --llm-binding ollama --help
 lightrag-server --embedding-binding ollama --help
 ```
 
+> Please use OpenAI-compatible method to access LLMs deployed by OpenRouter or vLLM. You can pass additional parameters to OpenRouter or vLLM through the `OPENAI_LLM_EXTRA_BODY` environment variable to disable reasoning mode or achieve other personalized controls.
+
 ### Entity Extraction Configuration
 * ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true)