refac: Rename summary_max_tokens to summary_context_size, comprehensive parameter validation for summary configuration

- Update algorithm logic in operate.py for better token management - Fix health endpoint to use correct parameter names
2025-08-26 01:35:50 +08:00 · 2025-08-26 01:35:50 +08:00 · de2daf6565
commit de2daf6565
parent 91767ffcee
10 changed files with 72 additions and 33 deletions
--- a/README-zh.md
+++ b/README-zh.md
@ -268,7 +268,8 @@ if __name__ == "__main__":
 | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
 | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
-| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `30000`（由环境变量 SUMMARY_MAX_TOKENS 设置） |
+| **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`（由环境变量 SUMMARY_MAX_CONTEXT 设置） |
+| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`（由环境变量 SUMMARY_MAX_TOKENS 设置） |
 | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`（默认值由环境变量MAX_ASYNC更改） |
 | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
 | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数，如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2（默认值由环境变量COSINE_THRESHOLD更改） |
@ -598,9 +599,9 @@ if __name__ == "__main__":

 为了提高检索质量，可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数：

-*   **Cohere / vLLM**: `cohere_rerank`
-*   **Jina AI**: `jina_rerank`
-*   **Aliyun阿里云**: `ali_rerank`
+* **Cohere / vLLM**: `cohere_rerank`
+* **Jina AI**: `jina_rerank`
+* **Aliyun阿里云**: `ali_rerank`

 您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法，请参阅`examples/rerank_example.py`文件。

--- a/README.md
+++ b/README.md
@ -275,7 +275,8 @@ A full list of LightRAG init parameters:
 | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
 | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
-| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `30000`（configured by env var SUMMARY_MAX_TOKENS) |
+| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`（configured by env var SUMMARY_CONTEXT_SIZE) |
+| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`（configured by env var SUMMARY_MAX_TOKENS) |
 | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`（default value changed by env var MAX_ASYNC) |
 | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
 | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2（default value changed by env var COSINE_THRESHOLD) |
--- a/env.example
+++ b/env.example
@ -125,12 +125,13 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
-### Entity and relation summarization configuration
-### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented)， and max tokens send to LLM
+
+### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented)
 # FORCE_LLM_SUMMARY_ON_MERGE=4
-# SUMMARY_MAX_TOKENS=30000
-### Maximum number of entity extraction attempts for ambiguous content
-# MAX_GLEANING=1
+### Number of tokens to trigger LLM summary on entity/relation merge
+# SUMMARY_MAX_TOKENS = 500
+### Maximum context size sent to LLM for description summary
+# SUMMARY_CONTEXT_SIZE=10000

 ###############################
 ### Concurrency Configuration
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@ -30,6 +30,7 @@ from lightrag.constants import (
    DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
    DEFAULT_MAX_ASYNC,
    DEFAULT_SUMMARY_MAX_TOKENS,
+    DEFAULT_SUMMARY_CONTEXT_SIZE,
    DEFAULT_SUMMARY_LANGUAGE,
    DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
    DEFAULT_EMBEDDING_BATCH_NUM,
@ -119,10 +120,18 @@ def parse_args() -> argparse.Namespace:
        help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
    )
    parser.add_argument(
-        "--max-tokens",
+        "--summary-max-tokens",
        type=int,
        default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
-        help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
+        help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
+    )
+    parser.add_argument(
+        "--summary-context-size",
+        type=int,
+        default=get_env_value(
+            "SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int
+        ),
+        help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})",
    )

    # Logging configuration
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -2,7 +2,7 @@
 LightRAG FastAPI Server
 """

-from fastapi import FastAPI, Depends, HTTPException, status
+from fastapi import FastAPI, Depends, HTTPException
 import asyncio
 import os
 import logging
@ -472,7 +472,8 @@ def create_app(args):
            ),
            llm_model_name=args.llm_model,
            llm_model_max_async=args.max_async,
-            summary_max_tokens=args.max_tokens,
+            summary_max_tokens=args.summary_max_tokens,
+            summary_context_size=args.summary_context_size,
            chunk_token_size=int(args.chunk_size),
            chunk_overlap_token_size=int(args.chunk_overlap_size),
            llm_model_kwargs=(
@ -510,7 +511,8 @@ def create_app(args):
            chunk_overlap_token_size=int(args.chunk_overlap_size),
            llm_model_name=args.llm_model,
            llm_model_max_async=args.max_async,
-            summary_max_tokens=args.max_tokens,
+            summary_max_tokens=args.summary_max_tokens,
+            summary_context_size=args.summary_context_size,
            embedding_func=embedding_func,
            kv_storage=args.kv_storage,
            graph_storage=args.graph_storage,
@ -598,7 +600,7 @@ def create_app(args):
        username = form_data.username
        if auth_handler.accounts.get(username) != form_data.password:
            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials"
+                status_code=401, detail="Incorrect credentials"
            )

        # Regular user login
@ -642,7 +644,8 @@ def create_app(args):
                    "embedding_binding": args.embedding_binding,
                    "embedding_binding_host": args.embedding_binding_host,
                    "embedding_model": args.embedding_model,
-                    "max_tokens": args.max_tokens,
+                    "summary_max_tokens": args.summary_max_tokens,
+                    "summary_context_size": args.summary_context_size,
                    "kv_storage": args.kv_storage,
                    "doc_status_storage": args.doc_status_storage,
                    "graph_storage": args.graph_storage,
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@ -242,8 +242,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{args.llm_model}")
    ASCIIColors.white("    ├─ Max Async for LLM: ", end="")
    ASCIIColors.yellow(f"{args.max_async}")
-    ASCIIColors.white("    ├─ Max Tokens: ", end="")
-    ASCIIColors.yellow(f"{args.max_tokens}")
+    ASCIIColors.white("    ├─ Summary Context Size: ", end="")
+    ASCIIColors.yellow(f"{args.summary_context_size}")
    ASCIIColors.white("    ├─ LLM Cache Enabled: ", end="")
    ASCIIColors.yellow(f"{args.enable_llm_cache}")
    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
--- a/lightrag/constants.py
+++ b/lightrag/constants.py
@ -12,9 +12,11 @@ DEFAULT_MAX_GRAPH_NODES = 1000

 # Default values for extraction settings
 DEFAULT_SUMMARY_LANGUAGE = "English"  # Default language for summaries
-DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
 DEFAULT_MAX_GLEANING = 1
-DEFAULT_SUMMARY_MAX_TOKENS = 30000  # Default maximum token size
+
+DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
+DEFAULT_SUMMARY_MAX_TOKENS = 500  # Max token size for entity/relation summary
+DEFAULT_SUMMARY_CONTEXT_SIZE = 10000  # Default maximum token size

 # Separator for graph fields
 GRAPH_FIELD_SEP = "<SEP>"
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -34,6 +34,7 @@ from lightrag.constants import (
    DEFAULT_KG_CHUNK_PICK_METHOD,
    DEFAULT_MIN_RERANK_SCORE,
    DEFAULT_SUMMARY_MAX_TOKENS,
+    DEFAULT_SUMMARY_CONTEXT_SIZE,
    DEFAULT_MAX_ASYNC,
    DEFAULT_MAX_PARALLEL_INSERT,
    DEFAULT_MAX_GRAPH_NODES,
@ -285,6 +286,11 @@ class LightRAG:
    summary_max_tokens: int = field(
        default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
    )
+    """Maximum tokens allowed for entity/relation description."""
+
+    summary_context_size: int = field(
+        default=int(os.getenv("SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE))
+    )
    """Maximum number of tokens allowed per LLM response."""

    llm_model_max_async: int = field(
@ -416,6 +422,21 @@ class LightRAG:
        if self.ollama_server_infos is None:
            self.ollama_server_infos = OllamaServerInfos()

+
+        # Validate config
+        if self.force_llm_summary_on_merge < 3:
+            logger.warning(
+                f"force_llm_summary_on_merge should be at least 3, got {self.force_llm_summary_on_merge}"
+            )
+        if self.summary_max_tokens * self.force_llm_summary_on_merge > self.summary_context_size:
+            logger.warning(
+                f"summary_context_size must be at least summary_max_tokens * force_llm_summary_on_merge, got {self.summary_context_size}"
+            )
+        if self.summary_context_size > self.max_total_tokens:
+            logger.warning(
+                f"summary_context_size must be less than max_total_tokens, got {self.summary_context_size}"
+            )
+
        # Fix global_config now
        global_config = asdict(self)

--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -124,10 +124,11 @@ async def _handle_entity_relation_summary(
    """Handle entity relation description summary using map-reduce approach.

    This function summarizes a list of descriptions using a map-reduce strategy:
-    1. If total tokens <= summary_max_tokens, summarize directly
-    2. Otherwise, split descriptions into chunks that fit within token limits
-    3. Summarize each chunk, then recursively process the summaries
-    4. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
+    1. If total tokens < summary_context_size and len(description_list) < force_llm_summary_on_merge, no need to summarize
+    2. If total tokens < summary_max_tokens, summarize with LLM directly
+    3. Otherwise, split descriptions into chunks that fit within token limits
+    4. Summarize each chunk, then recursively process the summaries
+    5. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge

    Args:
        entity_or_relation_name: Name of the entity or relation being summarized
@ -148,6 +149,7 @@ async def _handle_entity_relation_summary(

    # Get configuration
    tokenizer: Tokenizer = global_config["tokenizer"]
+    summary_context_size = global_config["summary_context_size"]
    summary_max_tokens = global_config["summary_max_tokens"]

    current_list = description_list[:]  # Copy the list to avoid modifying original
@ -158,11 +160,11 @@ async def _handle_entity_relation_summary(
        total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list)

        # If total length is within limits, perform final summarization
-        if (
-            total_tokens <= summary_max_tokens
-            or len(current_list) < force_llm_summary_on_merge
-        ):
-            if len(current_list) < force_llm_summary_on_merge:
+        if total_tokens <= summary_context_size:
+            if (
+                len(current_list) < force_llm_summary_on_merge
+                and total_tokens < summary_max_tokens
+            ):
                # Already the final result
                final_description = seperator.join(current_list)
                return final_description if final_description else ""
@ -184,9 +186,9 @@ async def _handle_entity_relation_summary(
            desc_tokens = len(tokenizer.encode(desc))

            # If adding current description would exceed limit, finalize current chunk
-            if current_tokens + desc_tokens > summary_max_tokens and current_chunk:
+            if current_tokens + desc_tokens > summary_context_size and current_chunk:
                chunks.append(current_chunk)
-                current_chunk = [desc]
+                current_chunk = [desc]  # Intial chunk for next group
                current_tokens = desc_tokens
            else:
                current_chunk.append(desc)
--- a/lightrag_webui/src/api/lightrag.ts
+++ b/lightrag_webui/src/api/lightrag.ts
@ -35,7 +35,6 @@ export type LightragStatus = {
    embedding_binding: string
    embedding_binding_host: string
    embedding_model: string
-    max_tokens: number
    kv_storage: string
    doc_status_storage: string
    graph_storage: string