From de2daf65653ce0d06a128aecc8b3faf492c1f74b Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 26 Aug 2025 01:35:50 +0800 Subject: [PATCH] refac: Rename summary_max_tokens to summary_context_size, comprehensive parameter validation for summary configuration - Update algorithm logic in operate.py for better token management - Fix health endpoint to use correct parameter names --- README-zh.md | 9 +++++---- README.md | 3 ++- env.example | 11 ++++++----- lightrag/api/config.py | 13 +++++++++++-- lightrag/api/lightrag_server.py | 13 ++++++++----- lightrag/api/utils_api.py | 4 ++-- lightrag/constants.py | 6 ++++-- lightrag/lightrag.py | 21 +++++++++++++++++++++ lightrag/operate.py | 24 +++++++++++++----------- lightrag_webui/src/api/lightrag.ts | 1 - 10 files changed, 72 insertions(+), 33 deletions(-) diff --git a/README-zh.md b/README-zh.md index 8b7239ec..d3403b35 100644 --- a/README-zh.md +++ b/README-zh.md @@ -268,7 +268,8 @@ if __name__ == "__main__": | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` | | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` | | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` | -| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `30000`(由环境变量 SUMMARY_MAX_TOKENS 设置) | +| **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`(由环境变量 SUMMARY_MAX_CONTEXT 设置) | +| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`(由环境变量 SUMMARY_MAX_TOKENS 设置) | | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`(默认值由环境变量MAX_ASYNC更改) | | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | | | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2(默认值由环境变量COSINE_THRESHOLD更改) | @@ -598,9 +599,9 @@ if __name__ == "__main__": 为了提高检索质量,可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数: -* **Cohere / vLLM**: `cohere_rerank` -* **Jina AI**: `jina_rerank` -* **Aliyun阿里云**: `ali_rerank` +* **Cohere / vLLM**: `cohere_rerank` +* **Jina AI**: `jina_rerank` +* **Aliyun阿里云**: `ali_rerank` 您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法,请参阅`examples/rerank_example.py`文件。 diff --git a/README.md b/README.md index eacb4982..5ad37f01 100644 --- a/README.md +++ b/README.md @@ -275,7 +275,8 @@ A full list of LightRAG init parameters: | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` | | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` | | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` | -| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `30000`(configured by env var SUMMARY_MAX_TOKENS) | +| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`(configured by env var SUMMARY_CONTEXT_SIZE) | +| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`(configured by env var SUMMARY_MAX_TOKENS) | | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) | | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | | | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) | diff --git a/env.example b/env.example index 41c77ede..a824a1f5 100644 --- a/env.example +++ b/env.example @@ -125,12 +125,13 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Chunk size for document splitting, 500~1500 is recommended # CHUNK_SIZE=1200 # CHUNK_OVERLAP_SIZE=100 -### Entity and relation summarization configuration -### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented), and max tokens send to LLM + +### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented) # FORCE_LLM_SUMMARY_ON_MERGE=4 -# SUMMARY_MAX_TOKENS=30000 -### Maximum number of entity extraction attempts for ambiguous content -# MAX_GLEANING=1 +### Number of tokens to trigger LLM summary on entity/relation merge +# SUMMARY_MAX_TOKENS = 500 +### Maximum context size sent to LLM for description summary +# SUMMARY_CONTEXT_SIZE=10000 ############################### ### Concurrency Configuration diff --git a/lightrag/api/config.py b/lightrag/api/config.py index a5e352dc..f4a281a7 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -30,6 +30,7 @@ from lightrag.constants import ( DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, DEFAULT_MAX_ASYNC, DEFAULT_SUMMARY_MAX_TOKENS, + DEFAULT_SUMMARY_CONTEXT_SIZE, DEFAULT_SUMMARY_LANGUAGE, DEFAULT_EMBEDDING_FUNC_MAX_ASYNC, DEFAULT_EMBEDDING_BATCH_NUM, @@ -119,10 +120,18 @@ def parse_args() -> argparse.Namespace: help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})", ) parser.add_argument( - "--max-tokens", + "--summary-max-tokens", type=int, default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int), - help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})", + help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})", + ) + parser.add_argument( + "--summary-context-size", + type=int, + default=get_env_value( + "SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int + ), + help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})", ) # Logging configuration diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index ec1d38d5..2cb53fcd 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -2,7 +2,7 @@ LightRAG FastAPI Server """ -from fastapi import FastAPI, Depends, HTTPException, status +from fastapi import FastAPI, Depends, HTTPException import asyncio import os import logging @@ -472,7 +472,8 @@ def create_app(args): ), llm_model_name=args.llm_model, llm_model_max_async=args.max_async, - summary_max_tokens=args.max_tokens, + summary_max_tokens=args.summary_max_tokens, + summary_context_size=args.summary_context_size, chunk_token_size=int(args.chunk_size), chunk_overlap_token_size=int(args.chunk_overlap_size), llm_model_kwargs=( @@ -510,7 +511,8 @@ def create_app(args): chunk_overlap_token_size=int(args.chunk_overlap_size), llm_model_name=args.llm_model, llm_model_max_async=args.max_async, - summary_max_tokens=args.max_tokens, + summary_max_tokens=args.summary_max_tokens, + summary_context_size=args.summary_context_size, embedding_func=embedding_func, kv_storage=args.kv_storage, graph_storage=args.graph_storage, @@ -598,7 +600,7 @@ def create_app(args): username = form_data.username if auth_handler.accounts.get(username) != form_data.password: raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials" + status_code=401, detail="Incorrect credentials" ) # Regular user login @@ -642,7 +644,8 @@ def create_app(args): "embedding_binding": args.embedding_binding, "embedding_binding_host": args.embedding_binding_host, "embedding_model": args.embedding_model, - "max_tokens": args.max_tokens, + "summary_max_tokens": args.summary_max_tokens, + "summary_context_size": args.summary_context_size, "kv_storage": args.kv_storage, "doc_status_storage": args.doc_status_storage, "graph_storage": args.graph_storage, diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index fc05716c..a53f8bee 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -242,8 +242,8 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.llm_model}") ASCIIColors.white(" ├─ Max Async for LLM: ", end="") ASCIIColors.yellow(f"{args.max_async}") - ASCIIColors.white(" ├─ Max Tokens: ", end="") - ASCIIColors.yellow(f"{args.max_tokens}") + ASCIIColors.white(" ├─ Summary Context Size: ", end="") + ASCIIColors.yellow(f"{args.summary_context_size}") ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="") ASCIIColors.yellow(f"{args.enable_llm_cache}") ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="") diff --git a/lightrag/constants.py b/lightrag/constants.py index 9445872e..c180e2dd 100644 --- a/lightrag/constants.py +++ b/lightrag/constants.py @@ -12,9 +12,11 @@ DEFAULT_MAX_GRAPH_NODES = 1000 # Default values for extraction settings DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries -DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4 DEFAULT_MAX_GLEANING = 1 -DEFAULT_SUMMARY_MAX_TOKENS = 30000 # Default maximum token size + +DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4 +DEFAULT_SUMMARY_MAX_TOKENS = 500 # Max token size for entity/relation summary +DEFAULT_SUMMARY_CONTEXT_SIZE = 10000 # Default maximum token size # Separator for graph fields GRAPH_FIELD_SEP = "" diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index fa529784..1d8c08ed 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -34,6 +34,7 @@ from lightrag.constants import ( DEFAULT_KG_CHUNK_PICK_METHOD, DEFAULT_MIN_RERANK_SCORE, DEFAULT_SUMMARY_MAX_TOKENS, + DEFAULT_SUMMARY_CONTEXT_SIZE, DEFAULT_MAX_ASYNC, DEFAULT_MAX_PARALLEL_INSERT, DEFAULT_MAX_GRAPH_NODES, @@ -285,6 +286,11 @@ class LightRAG: summary_max_tokens: int = field( default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS)) ) + """Maximum tokens allowed for entity/relation description.""" + + summary_context_size: int = field( + default=int(os.getenv("SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE)) + ) """Maximum number of tokens allowed per LLM response.""" llm_model_max_async: int = field( @@ -416,6 +422,21 @@ class LightRAG: if self.ollama_server_infos is None: self.ollama_server_infos = OllamaServerInfos() + + # Validate config + if self.force_llm_summary_on_merge < 3: + logger.warning( + f"force_llm_summary_on_merge should be at least 3, got {self.force_llm_summary_on_merge}" + ) + if self.summary_max_tokens * self.force_llm_summary_on_merge > self.summary_context_size: + logger.warning( + f"summary_context_size must be at least summary_max_tokens * force_llm_summary_on_merge, got {self.summary_context_size}" + ) + if self.summary_context_size > self.max_total_tokens: + logger.warning( + f"summary_context_size must be less than max_total_tokens, got {self.summary_context_size}" + ) + # Fix global_config now global_config = asdict(self) diff --git a/lightrag/operate.py b/lightrag/operate.py index 5ad573cc..e59e944d 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -124,10 +124,11 @@ async def _handle_entity_relation_summary( """Handle entity relation description summary using map-reduce approach. This function summarizes a list of descriptions using a map-reduce strategy: - 1. If total tokens <= summary_max_tokens, summarize directly - 2. Otherwise, split descriptions into chunks that fit within token limits - 3. Summarize each chunk, then recursively process the summaries - 4. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge + 1. If total tokens < summary_context_size and len(description_list) < force_llm_summary_on_merge, no need to summarize + 2. If total tokens < summary_max_tokens, summarize with LLM directly + 3. Otherwise, split descriptions into chunks that fit within token limits + 4. Summarize each chunk, then recursively process the summaries + 5. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge Args: entity_or_relation_name: Name of the entity or relation being summarized @@ -148,6 +149,7 @@ async def _handle_entity_relation_summary( # Get configuration tokenizer: Tokenizer = global_config["tokenizer"] + summary_context_size = global_config["summary_context_size"] summary_max_tokens = global_config["summary_max_tokens"] current_list = description_list[:] # Copy the list to avoid modifying original @@ -158,11 +160,11 @@ async def _handle_entity_relation_summary( total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list) # If total length is within limits, perform final summarization - if ( - total_tokens <= summary_max_tokens - or len(current_list) < force_llm_summary_on_merge - ): - if len(current_list) < force_llm_summary_on_merge: + if total_tokens <= summary_context_size: + if ( + len(current_list) < force_llm_summary_on_merge + and total_tokens < summary_max_tokens + ): # Already the final result final_description = seperator.join(current_list) return final_description if final_description else "" @@ -184,9 +186,9 @@ async def _handle_entity_relation_summary( desc_tokens = len(tokenizer.encode(desc)) # If adding current description would exceed limit, finalize current chunk - if current_tokens + desc_tokens > summary_max_tokens and current_chunk: + if current_tokens + desc_tokens > summary_context_size and current_chunk: chunks.append(current_chunk) - current_chunk = [desc] + current_chunk = [desc] # Intial chunk for next group current_tokens = desc_tokens else: current_chunk.append(desc) diff --git a/lightrag_webui/src/api/lightrag.ts b/lightrag_webui/src/api/lightrag.ts index d2f23f12..265126c7 100644 --- a/lightrag_webui/src/api/lightrag.ts +++ b/lightrag_webui/src/api/lightrag.ts @@ -35,7 +35,6 @@ export type LightragStatus = { embedding_binding: string embedding_binding_host: string embedding_model: string - max_tokens: number kv_storage: string doc_status_storage: string graph_storage: string