refac: Rename summary_max_tokens to summary_context_size, comprehensive parameter validation for summary configuration

- Update algorithm logic in operate.py for better token management
- Fix health endpoint to use correct parameter names
This commit is contained in:
yangdx 2025-08-26 01:35:50 +08:00
parent 91767ffcee
commit de2daf6565
10 changed files with 72 additions and 33 deletions

View file

@ -268,7 +268,8 @@ if __name__ == "__main__":
| **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
| **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
| **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `30000`(由环境变量 SUMMARY_MAX_TOKENS 设置) |
| **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`(由环境变量 SUMMARY_MAX_CONTEXT 设置) |
| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`(由环境变量 SUMMARY_MAX_TOKENS 设置) |
| **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`默认值由环境变量MAX_ASYNC更改 |
| **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
| **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2默认值由环境变量COSINE_THRESHOLD更改 |
@ -598,9 +599,9 @@ if __name__ == "__main__":
为了提高检索质量,可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数
* **Cohere / vLLM**: `cohere_rerank`
* **Jina AI**: `jina_rerank`
* **Aliyun阿里云**: `ali_rerank`
* **Cohere / vLLM**: `cohere_rerank`
* **Jina AI**: `jina_rerank`
* **Aliyun阿里云**: `ali_rerank`
您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法请参阅`examples/rerank_example.py`文件。

View file

@ -275,7 +275,8 @@ A full list of LightRAG init parameters:
| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `30000`configured by env var SUMMARY_MAX_TOKENS) |
| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`configured by env var SUMMARY_CONTEXT_SIZE) |
| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`configured by env var SUMMARY_MAX_TOKENS) |
| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`default value changed by env var MAX_ASYNC) |
| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2default value changed by env var COSINE_THRESHOLD) |

View file

@ -125,12 +125,13 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
### Chunk size for document splitting, 500~1500 is recommended
# CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100
### Entity and relation summarization configuration
### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented) and max tokens send to LLM
### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented)
# FORCE_LLM_SUMMARY_ON_MERGE=4
# SUMMARY_MAX_TOKENS=30000
### Maximum number of entity extraction attempts for ambiguous content
# MAX_GLEANING=1
### Number of tokens to trigger LLM summary on entity/relation merge
# SUMMARY_MAX_TOKENS = 500
### Maximum context size sent to LLM for description summary
# SUMMARY_CONTEXT_SIZE=10000
###############################
### Concurrency Configuration

View file

@ -30,6 +30,7 @@ from lightrag.constants import (
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
DEFAULT_MAX_ASYNC,
DEFAULT_SUMMARY_MAX_TOKENS,
DEFAULT_SUMMARY_CONTEXT_SIZE,
DEFAULT_SUMMARY_LANGUAGE,
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
DEFAULT_EMBEDDING_BATCH_NUM,
@ -119,10 +120,18 @@ def parse_args() -> argparse.Namespace:
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
)
parser.add_argument(
"--max-tokens",
"--summary-max-tokens",
type=int,
default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
)
parser.add_argument(
"--summary-context-size",
type=int,
default=get_env_value(
"SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int
),
help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})",
)
# Logging configuration

View file

@ -2,7 +2,7 @@
LightRAG FastAPI Server
"""
from fastapi import FastAPI, Depends, HTTPException, status
from fastapi import FastAPI, Depends, HTTPException
import asyncio
import os
import logging
@ -472,7 +472,8 @@ def create_app(args):
),
llm_model_name=args.llm_model,
llm_model_max_async=args.max_async,
summary_max_tokens=args.max_tokens,
summary_max_tokens=args.summary_max_tokens,
summary_context_size=args.summary_context_size,
chunk_token_size=int(args.chunk_size),
chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_kwargs=(
@ -510,7 +511,8 @@ def create_app(args):
chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_name=args.llm_model,
llm_model_max_async=args.max_async,
summary_max_tokens=args.max_tokens,
summary_max_tokens=args.summary_max_tokens,
summary_context_size=args.summary_context_size,
embedding_func=embedding_func,
kv_storage=args.kv_storage,
graph_storage=args.graph_storage,
@ -598,7 +600,7 @@ def create_app(args):
username = form_data.username
if auth_handler.accounts.get(username) != form_data.password:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials"
status_code=401, detail="Incorrect credentials"
)
# Regular user login
@ -642,7 +644,8 @@ def create_app(args):
"embedding_binding": args.embedding_binding,
"embedding_binding_host": args.embedding_binding_host,
"embedding_model": args.embedding_model,
"max_tokens": args.max_tokens,
"summary_max_tokens": args.summary_max_tokens,
"summary_context_size": args.summary_context_size,
"kv_storage": args.kv_storage,
"doc_status_storage": args.doc_status_storage,
"graph_storage": args.graph_storage,

View file

@ -242,8 +242,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.llm_model}")
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
ASCIIColors.yellow(f"{args.max_async}")
ASCIIColors.white(" ├─ Max Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_tokens}")
ASCIIColors.white(" ├─ Summary Context Size: ", end="")
ASCIIColors.yellow(f"{args.summary_context_size}")
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache}")
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")

View file

@ -12,9 +12,11 @@ DEFAULT_MAX_GRAPH_NODES = 1000
# Default values for extraction settings
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
DEFAULT_MAX_GLEANING = 1
DEFAULT_SUMMARY_MAX_TOKENS = 30000 # Default maximum token size
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
DEFAULT_SUMMARY_MAX_TOKENS = 500 # Max token size for entity/relation summary
DEFAULT_SUMMARY_CONTEXT_SIZE = 10000 # Default maximum token size
# Separator for graph fields
GRAPH_FIELD_SEP = "<SEP>"

View file

@ -34,6 +34,7 @@ from lightrag.constants import (
DEFAULT_KG_CHUNK_PICK_METHOD,
DEFAULT_MIN_RERANK_SCORE,
DEFAULT_SUMMARY_MAX_TOKENS,
DEFAULT_SUMMARY_CONTEXT_SIZE,
DEFAULT_MAX_ASYNC,
DEFAULT_MAX_PARALLEL_INSERT,
DEFAULT_MAX_GRAPH_NODES,
@ -285,6 +286,11 @@ class LightRAG:
summary_max_tokens: int = field(
default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
)
"""Maximum tokens allowed for entity/relation description."""
summary_context_size: int = field(
default=int(os.getenv("SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE))
)
"""Maximum number of tokens allowed per LLM response."""
llm_model_max_async: int = field(
@ -416,6 +422,21 @@ class LightRAG:
if self.ollama_server_infos is None:
self.ollama_server_infos = OllamaServerInfos()
# Validate config
if self.force_llm_summary_on_merge < 3:
logger.warning(
f"force_llm_summary_on_merge should be at least 3, got {self.force_llm_summary_on_merge}"
)
if self.summary_max_tokens * self.force_llm_summary_on_merge > self.summary_context_size:
logger.warning(
f"summary_context_size must be at least summary_max_tokens * force_llm_summary_on_merge, got {self.summary_context_size}"
)
if self.summary_context_size > self.max_total_tokens:
logger.warning(
f"summary_context_size must be less than max_total_tokens, got {self.summary_context_size}"
)
# Fix global_config now
global_config = asdict(self)

View file

@ -124,10 +124,11 @@ async def _handle_entity_relation_summary(
"""Handle entity relation description summary using map-reduce approach.
This function summarizes a list of descriptions using a map-reduce strategy:
1. If total tokens <= summary_max_tokens, summarize directly
2. Otherwise, split descriptions into chunks that fit within token limits
3. Summarize each chunk, then recursively process the summaries
4. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
1. If total tokens < summary_context_size and len(description_list) < force_llm_summary_on_merge, no need to summarize
2. If total tokens < summary_max_tokens, summarize with LLM directly
3. Otherwise, split descriptions into chunks that fit within token limits
4. Summarize each chunk, then recursively process the summaries
5. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
Args:
entity_or_relation_name: Name of the entity or relation being summarized
@ -148,6 +149,7 @@ async def _handle_entity_relation_summary(
# Get configuration
tokenizer: Tokenizer = global_config["tokenizer"]
summary_context_size = global_config["summary_context_size"]
summary_max_tokens = global_config["summary_max_tokens"]
current_list = description_list[:] # Copy the list to avoid modifying original
@ -158,11 +160,11 @@ async def _handle_entity_relation_summary(
total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list)
# If total length is within limits, perform final summarization
if (
total_tokens <= summary_max_tokens
or len(current_list) < force_llm_summary_on_merge
):
if len(current_list) < force_llm_summary_on_merge:
if total_tokens <= summary_context_size:
if (
len(current_list) < force_llm_summary_on_merge
and total_tokens < summary_max_tokens
):
# Already the final result
final_description = seperator.join(current_list)
return final_description if final_description else ""
@ -184,9 +186,9 @@ async def _handle_entity_relation_summary(
desc_tokens = len(tokenizer.encode(desc))
# If adding current description would exceed limit, finalize current chunk
if current_tokens + desc_tokens > summary_max_tokens and current_chunk:
if current_tokens + desc_tokens > summary_context_size and current_chunk:
chunks.append(current_chunk)
current_chunk = [desc]
current_chunk = [desc] # Intial chunk for next group
current_tokens = desc_tokens
else:
current_chunk.append(desc)

View file

@ -35,7 +35,6 @@ export type LightragStatus = {
embedding_binding: string
embedding_binding_host: string
embedding_model: string
max_tokens: number
kv_storage: string
doc_status_storage: string
graph_storage: string