refac: Rename summary_max_tokens to summary_context_size, comprehensive parameter validation for summary configuration

- Update algorithm logic in operate.py for better token management
- Fix health endpoint to use correct parameter names
This commit is contained in:
yangdx 2025-08-26 01:35:50 +08:00
parent 91767ffcee
commit de2daf6565
10 changed files with 72 additions and 33 deletions

View file

@ -268,7 +268,8 @@ if __name__ == "__main__":
| **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` | | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
| **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` | | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
| **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` | | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `30000`(由环境变量 SUMMARY_MAX_TOKENS 设置) | | **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`(由环境变量 SUMMARY_MAX_CONTEXT 设置) |
| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`(由环境变量 SUMMARY_MAX_TOKENS 设置) |
| **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`默认值由环境变量MAX_ASYNC更改 | | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`默认值由环境变量MAX_ASYNC更改 |
| **llm_model_kwargs** | `dict` | LLM生成的附加参数 | | | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
| **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2默认值由环境变量COSINE_THRESHOLD更改 | | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2默认值由环境变量COSINE_THRESHOLD更改 |
@ -598,9 +599,9 @@ if __name__ == "__main__":
为了提高检索质量,可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数 为了提高检索质量,可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数
* **Cohere / vLLM**: `cohere_rerank` * **Cohere / vLLM**: `cohere_rerank`
* **Jina AI**: `jina_rerank` * **Jina AI**: `jina_rerank`
* **Aliyun阿里云**: `ali_rerank` * **Aliyun阿里云**: `ali_rerank`
您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法请参阅`examples/rerank_example.py`文件。 您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法请参阅`examples/rerank_example.py`文件。

View file

@ -275,7 +275,8 @@ A full list of LightRAG init parameters:
| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` | | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` | | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` | | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `30000`configured by env var SUMMARY_MAX_TOKENS) | | **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`configured by env var SUMMARY_CONTEXT_SIZE) |
| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`configured by env var SUMMARY_MAX_TOKENS) |
| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`default value changed by env var MAX_ASYNC) | | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`default value changed by env var MAX_ASYNC) |
| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | | | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2default value changed by env var COSINE_THRESHOLD) | | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2default value changed by env var COSINE_THRESHOLD) |

View file

@ -125,12 +125,13 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
### Chunk size for document splitting, 500~1500 is recommended ### Chunk size for document splitting, 500~1500 is recommended
# CHUNK_SIZE=1200 # CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100 # CHUNK_OVERLAP_SIZE=100
### Entity and relation summarization configuration
### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented) and max tokens send to LLM ### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented)
# FORCE_LLM_SUMMARY_ON_MERGE=4 # FORCE_LLM_SUMMARY_ON_MERGE=4
# SUMMARY_MAX_TOKENS=30000 ### Number of tokens to trigger LLM summary on entity/relation merge
### Maximum number of entity extraction attempts for ambiguous content # SUMMARY_MAX_TOKENS = 500
# MAX_GLEANING=1 ### Maximum context size sent to LLM for description summary
# SUMMARY_CONTEXT_SIZE=10000
############################### ###############################
### Concurrency Configuration ### Concurrency Configuration

View file

@ -30,6 +30,7 @@ from lightrag.constants import (
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
DEFAULT_MAX_ASYNC, DEFAULT_MAX_ASYNC,
DEFAULT_SUMMARY_MAX_TOKENS, DEFAULT_SUMMARY_MAX_TOKENS,
DEFAULT_SUMMARY_CONTEXT_SIZE,
DEFAULT_SUMMARY_LANGUAGE, DEFAULT_SUMMARY_LANGUAGE,
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC, DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
DEFAULT_EMBEDDING_BATCH_NUM, DEFAULT_EMBEDDING_BATCH_NUM,
@ -119,10 +120,18 @@ def parse_args() -> argparse.Namespace:
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})", help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
) )
parser.add_argument( parser.add_argument(
"--max-tokens", "--summary-max-tokens",
type=int, type=int,
default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int), default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})", help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
)
parser.add_argument(
"--summary-context-size",
type=int,
default=get_env_value(
"SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int
),
help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})",
) )
# Logging configuration # Logging configuration

View file

@ -2,7 +2,7 @@
LightRAG FastAPI Server LightRAG FastAPI Server
""" """
from fastapi import FastAPI, Depends, HTTPException, status from fastapi import FastAPI, Depends, HTTPException
import asyncio import asyncio
import os import os
import logging import logging
@ -472,7 +472,8 @@ def create_app(args):
), ),
llm_model_name=args.llm_model, llm_model_name=args.llm_model,
llm_model_max_async=args.max_async, llm_model_max_async=args.max_async,
summary_max_tokens=args.max_tokens, summary_max_tokens=args.summary_max_tokens,
summary_context_size=args.summary_context_size,
chunk_token_size=int(args.chunk_size), chunk_token_size=int(args.chunk_size),
chunk_overlap_token_size=int(args.chunk_overlap_size), chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_kwargs=( llm_model_kwargs=(
@ -510,7 +511,8 @@ def create_app(args):
chunk_overlap_token_size=int(args.chunk_overlap_size), chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_name=args.llm_model, llm_model_name=args.llm_model,
llm_model_max_async=args.max_async, llm_model_max_async=args.max_async,
summary_max_tokens=args.max_tokens, summary_max_tokens=args.summary_max_tokens,
summary_context_size=args.summary_context_size,
embedding_func=embedding_func, embedding_func=embedding_func,
kv_storage=args.kv_storage, kv_storage=args.kv_storage,
graph_storage=args.graph_storage, graph_storage=args.graph_storage,
@ -598,7 +600,7 @@ def create_app(args):
username = form_data.username username = form_data.username
if auth_handler.accounts.get(username) != form_data.password: if auth_handler.accounts.get(username) != form_data.password:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials" status_code=401, detail="Incorrect credentials"
) )
# Regular user login # Regular user login
@ -642,7 +644,8 @@ def create_app(args):
"embedding_binding": args.embedding_binding, "embedding_binding": args.embedding_binding,
"embedding_binding_host": args.embedding_binding_host, "embedding_binding_host": args.embedding_binding_host,
"embedding_model": args.embedding_model, "embedding_model": args.embedding_model,
"max_tokens": args.max_tokens, "summary_max_tokens": args.summary_max_tokens,
"summary_context_size": args.summary_context_size,
"kv_storage": args.kv_storage, "kv_storage": args.kv_storage,
"doc_status_storage": args.doc_status_storage, "doc_status_storage": args.doc_status_storage,
"graph_storage": args.graph_storage, "graph_storage": args.graph_storage,

View file

@ -242,8 +242,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.llm_model}") ASCIIColors.yellow(f"{args.llm_model}")
ASCIIColors.white(" ├─ Max Async for LLM: ", end="") ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
ASCIIColors.yellow(f"{args.max_async}") ASCIIColors.yellow(f"{args.max_async}")
ASCIIColors.white(" ├─ Max Tokens: ", end="") ASCIIColors.white(" ├─ Summary Context Size: ", end="")
ASCIIColors.yellow(f"{args.max_tokens}") ASCIIColors.yellow(f"{args.summary_context_size}")
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="") ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache}") ASCIIColors.yellow(f"{args.enable_llm_cache}")
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="") ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")

View file

@ -12,9 +12,11 @@ DEFAULT_MAX_GRAPH_NODES = 1000
# Default values for extraction settings # Default values for extraction settings
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
DEFAULT_MAX_GLEANING = 1 DEFAULT_MAX_GLEANING = 1
DEFAULT_SUMMARY_MAX_TOKENS = 30000 # Default maximum token size
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
DEFAULT_SUMMARY_MAX_TOKENS = 500 # Max token size for entity/relation summary
DEFAULT_SUMMARY_CONTEXT_SIZE = 10000 # Default maximum token size
# Separator for graph fields # Separator for graph fields
GRAPH_FIELD_SEP = "<SEP>" GRAPH_FIELD_SEP = "<SEP>"

View file

@ -34,6 +34,7 @@ from lightrag.constants import (
DEFAULT_KG_CHUNK_PICK_METHOD, DEFAULT_KG_CHUNK_PICK_METHOD,
DEFAULT_MIN_RERANK_SCORE, DEFAULT_MIN_RERANK_SCORE,
DEFAULT_SUMMARY_MAX_TOKENS, DEFAULT_SUMMARY_MAX_TOKENS,
DEFAULT_SUMMARY_CONTEXT_SIZE,
DEFAULT_MAX_ASYNC, DEFAULT_MAX_ASYNC,
DEFAULT_MAX_PARALLEL_INSERT, DEFAULT_MAX_PARALLEL_INSERT,
DEFAULT_MAX_GRAPH_NODES, DEFAULT_MAX_GRAPH_NODES,
@ -285,6 +286,11 @@ class LightRAG:
summary_max_tokens: int = field( summary_max_tokens: int = field(
default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS)) default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
) )
"""Maximum tokens allowed for entity/relation description."""
summary_context_size: int = field(
default=int(os.getenv("SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE))
)
"""Maximum number of tokens allowed per LLM response.""" """Maximum number of tokens allowed per LLM response."""
llm_model_max_async: int = field( llm_model_max_async: int = field(
@ -416,6 +422,21 @@ class LightRAG:
if self.ollama_server_infos is None: if self.ollama_server_infos is None:
self.ollama_server_infos = OllamaServerInfos() self.ollama_server_infos = OllamaServerInfos()
# Validate config
if self.force_llm_summary_on_merge < 3:
logger.warning(
f"force_llm_summary_on_merge should be at least 3, got {self.force_llm_summary_on_merge}"
)
if self.summary_max_tokens * self.force_llm_summary_on_merge > self.summary_context_size:
logger.warning(
f"summary_context_size must be at least summary_max_tokens * force_llm_summary_on_merge, got {self.summary_context_size}"
)
if self.summary_context_size > self.max_total_tokens:
logger.warning(
f"summary_context_size must be less than max_total_tokens, got {self.summary_context_size}"
)
# Fix global_config now # Fix global_config now
global_config = asdict(self) global_config = asdict(self)

View file

@ -124,10 +124,11 @@ async def _handle_entity_relation_summary(
"""Handle entity relation description summary using map-reduce approach. """Handle entity relation description summary using map-reduce approach.
This function summarizes a list of descriptions using a map-reduce strategy: This function summarizes a list of descriptions using a map-reduce strategy:
1. If total tokens <= summary_max_tokens, summarize directly 1. If total tokens < summary_context_size and len(description_list) < force_llm_summary_on_merge, no need to summarize
2. Otherwise, split descriptions into chunks that fit within token limits 2. If total tokens < summary_max_tokens, summarize with LLM directly
3. Summarize each chunk, then recursively process the summaries 3. Otherwise, split descriptions into chunks that fit within token limits
4. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge 4. Summarize each chunk, then recursively process the summaries
5. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
Args: Args:
entity_or_relation_name: Name of the entity or relation being summarized entity_or_relation_name: Name of the entity or relation being summarized
@ -148,6 +149,7 @@ async def _handle_entity_relation_summary(
# Get configuration # Get configuration
tokenizer: Tokenizer = global_config["tokenizer"] tokenizer: Tokenizer = global_config["tokenizer"]
summary_context_size = global_config["summary_context_size"]
summary_max_tokens = global_config["summary_max_tokens"] summary_max_tokens = global_config["summary_max_tokens"]
current_list = description_list[:] # Copy the list to avoid modifying original current_list = description_list[:] # Copy the list to avoid modifying original
@ -158,11 +160,11 @@ async def _handle_entity_relation_summary(
total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list) total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list)
# If total length is within limits, perform final summarization # If total length is within limits, perform final summarization
if ( if total_tokens <= summary_context_size:
total_tokens <= summary_max_tokens if (
or len(current_list) < force_llm_summary_on_merge len(current_list) < force_llm_summary_on_merge
): and total_tokens < summary_max_tokens
if len(current_list) < force_llm_summary_on_merge: ):
# Already the final result # Already the final result
final_description = seperator.join(current_list) final_description = seperator.join(current_list)
return final_description if final_description else "" return final_description if final_description else ""
@ -184,9 +186,9 @@ async def _handle_entity_relation_summary(
desc_tokens = len(tokenizer.encode(desc)) desc_tokens = len(tokenizer.encode(desc))
# If adding current description would exceed limit, finalize current chunk # If adding current description would exceed limit, finalize current chunk
if current_tokens + desc_tokens > summary_max_tokens and current_chunk: if current_tokens + desc_tokens > summary_context_size and current_chunk:
chunks.append(current_chunk) chunks.append(current_chunk)
current_chunk = [desc] current_chunk = [desc] # Intial chunk for next group
current_tokens = desc_tokens current_tokens = desc_tokens
else: else:
current_chunk.append(desc) current_chunk.append(desc)

View file

@ -35,7 +35,6 @@ export type LightragStatus = {
embedding_binding: string embedding_binding: string
embedding_binding_host: string embedding_binding_host: string
embedding_model: string embedding_model: string
max_tokens: number
kv_storage: string kv_storage: string
doc_status_storage: string doc_status_storage: string
graph_storage: string graph_storage: string