refac: Rename summary_max_tokens to summary_context_size, comprehensive parameter validation for summary configuration
- Update algorithm logic in operate.py for better token management - Fix health endpoint to use correct parameter names
This commit is contained in:
parent
91767ffcee
commit
de2daf6565
10 changed files with 72 additions and 33 deletions
|
|
@ -268,7 +268,8 @@ if __name__ == "__main__":
|
|||
| **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
|
||||
| **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
|
||||
| **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
|
||||
| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `30000`(由环境变量 SUMMARY_MAX_TOKENS 设置) |
|
||||
| **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`(由环境变量 SUMMARY_MAX_CONTEXT 设置) |
|
||||
| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`(由环境变量 SUMMARY_MAX_TOKENS 设置) |
|
||||
| **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`(默认值由环境变量MAX_ASYNC更改) |
|
||||
| **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
|
||||
| **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2(默认值由环境变量COSINE_THRESHOLD更改) |
|
||||
|
|
@ -598,9 +599,9 @@ if __name__ == "__main__":
|
|||
|
||||
为了提高检索质量,可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数:
|
||||
|
||||
* **Cohere / vLLM**: `cohere_rerank`
|
||||
* **Jina AI**: `jina_rerank`
|
||||
* **Aliyun阿里云**: `ali_rerank`
|
||||
* **Cohere / vLLM**: `cohere_rerank`
|
||||
* **Jina AI**: `jina_rerank`
|
||||
* **Aliyun阿里云**: `ali_rerank`
|
||||
|
||||
您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法,请参阅`examples/rerank_example.py`文件。
|
||||
|
||||
|
|
|
|||
|
|
@ -275,7 +275,8 @@ A full list of LightRAG init parameters:
|
|||
| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
|
||||
| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
|
||||
| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
|
||||
| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `30000`(configured by env var SUMMARY_MAX_TOKENS) |
|
||||
| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`(configured by env var SUMMARY_CONTEXT_SIZE) |
|
||||
| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`(configured by env var SUMMARY_MAX_TOKENS) |
|
||||
| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) |
|
||||
| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
|
||||
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
|
||||
|
|
|
|||
11
env.example
11
env.example
|
|
@ -125,12 +125,13 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
|
|||
### Chunk size for document splitting, 500~1500 is recommended
|
||||
# CHUNK_SIZE=1200
|
||||
# CHUNK_OVERLAP_SIZE=100
|
||||
### Entity and relation summarization configuration
|
||||
### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented), and max tokens send to LLM
|
||||
|
||||
### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented)
|
||||
# FORCE_LLM_SUMMARY_ON_MERGE=4
|
||||
# SUMMARY_MAX_TOKENS=30000
|
||||
### Maximum number of entity extraction attempts for ambiguous content
|
||||
# MAX_GLEANING=1
|
||||
### Number of tokens to trigger LLM summary on entity/relation merge
|
||||
# SUMMARY_MAX_TOKENS = 500
|
||||
### Maximum context size sent to LLM for description summary
|
||||
# SUMMARY_CONTEXT_SIZE=10000
|
||||
|
||||
###############################
|
||||
### Concurrency Configuration
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ from lightrag.constants import (
|
|||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
|
||||
DEFAULT_MAX_ASYNC,
|
||||
DEFAULT_SUMMARY_MAX_TOKENS,
|
||||
DEFAULT_SUMMARY_CONTEXT_SIZE,
|
||||
DEFAULT_SUMMARY_LANGUAGE,
|
||||
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
|
||||
DEFAULT_EMBEDDING_BATCH_NUM,
|
||||
|
|
@ -119,10 +120,18 @@ def parse_args() -> argparse.Namespace:
|
|||
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-tokens",
|
||||
"--summary-max-tokens",
|
||||
type=int,
|
||||
default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
|
||||
help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
|
||||
help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--summary-context-size",
|
||||
type=int,
|
||||
default=get_env_value(
|
||||
"SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int
|
||||
),
|
||||
help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})",
|
||||
)
|
||||
|
||||
# Logging configuration
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
LightRAG FastAPI Server
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, Depends, HTTPException, status
|
||||
from fastapi import FastAPI, Depends, HTTPException
|
||||
import asyncio
|
||||
import os
|
||||
import logging
|
||||
|
|
@ -472,7 +472,8 @@ def create_app(args):
|
|||
),
|
||||
llm_model_name=args.llm_model,
|
||||
llm_model_max_async=args.max_async,
|
||||
summary_max_tokens=args.max_tokens,
|
||||
summary_max_tokens=args.summary_max_tokens,
|
||||
summary_context_size=args.summary_context_size,
|
||||
chunk_token_size=int(args.chunk_size),
|
||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||
llm_model_kwargs=(
|
||||
|
|
@ -510,7 +511,8 @@ def create_app(args):
|
|||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||
llm_model_name=args.llm_model,
|
||||
llm_model_max_async=args.max_async,
|
||||
summary_max_tokens=args.max_tokens,
|
||||
summary_max_tokens=args.summary_max_tokens,
|
||||
summary_context_size=args.summary_context_size,
|
||||
embedding_func=embedding_func,
|
||||
kv_storage=args.kv_storage,
|
||||
graph_storage=args.graph_storage,
|
||||
|
|
@ -598,7 +600,7 @@ def create_app(args):
|
|||
username = form_data.username
|
||||
if auth_handler.accounts.get(username) != form_data.password:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials"
|
||||
status_code=401, detail="Incorrect credentials"
|
||||
)
|
||||
|
||||
# Regular user login
|
||||
|
|
@ -642,7 +644,8 @@ def create_app(args):
|
|||
"embedding_binding": args.embedding_binding,
|
||||
"embedding_binding_host": args.embedding_binding_host,
|
||||
"embedding_model": args.embedding_model,
|
||||
"max_tokens": args.max_tokens,
|
||||
"summary_max_tokens": args.summary_max_tokens,
|
||||
"summary_context_size": args.summary_context_size,
|
||||
"kv_storage": args.kv_storage,
|
||||
"doc_status_storage": args.doc_status_storage,
|
||||
"graph_storage": args.graph_storage,
|
||||
|
|
|
|||
|
|
@ -242,8 +242,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|||
ASCIIColors.yellow(f"{args.llm_model}")
|
||||
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
|
||||
ASCIIColors.yellow(f"{args.max_async}")
|
||||
ASCIIColors.white(" ├─ Max Tokens: ", end="")
|
||||
ASCIIColors.yellow(f"{args.max_tokens}")
|
||||
ASCIIColors.white(" ├─ Summary Context Size: ", end="")
|
||||
ASCIIColors.yellow(f"{args.summary_context_size}")
|
||||
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
|
||||
ASCIIColors.yellow(f"{args.enable_llm_cache}")
|
||||
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
|
||||
|
|
|
|||
|
|
@ -12,9 +12,11 @@ DEFAULT_MAX_GRAPH_NODES = 1000
|
|||
|
||||
# Default values for extraction settings
|
||||
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
|
||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
|
||||
DEFAULT_MAX_GLEANING = 1
|
||||
DEFAULT_SUMMARY_MAX_TOKENS = 30000 # Default maximum token size
|
||||
|
||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
|
||||
DEFAULT_SUMMARY_MAX_TOKENS = 500 # Max token size for entity/relation summary
|
||||
DEFAULT_SUMMARY_CONTEXT_SIZE = 10000 # Default maximum token size
|
||||
|
||||
# Separator for graph fields
|
||||
GRAPH_FIELD_SEP = "<SEP>"
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ from lightrag.constants import (
|
|||
DEFAULT_KG_CHUNK_PICK_METHOD,
|
||||
DEFAULT_MIN_RERANK_SCORE,
|
||||
DEFAULT_SUMMARY_MAX_TOKENS,
|
||||
DEFAULT_SUMMARY_CONTEXT_SIZE,
|
||||
DEFAULT_MAX_ASYNC,
|
||||
DEFAULT_MAX_PARALLEL_INSERT,
|
||||
DEFAULT_MAX_GRAPH_NODES,
|
||||
|
|
@ -285,6 +286,11 @@ class LightRAG:
|
|||
summary_max_tokens: int = field(
|
||||
default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
|
||||
)
|
||||
"""Maximum tokens allowed for entity/relation description."""
|
||||
|
||||
summary_context_size: int = field(
|
||||
default=int(os.getenv("SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE))
|
||||
)
|
||||
"""Maximum number of tokens allowed per LLM response."""
|
||||
|
||||
llm_model_max_async: int = field(
|
||||
|
|
@ -416,6 +422,21 @@ class LightRAG:
|
|||
if self.ollama_server_infos is None:
|
||||
self.ollama_server_infos = OllamaServerInfos()
|
||||
|
||||
|
||||
# Validate config
|
||||
if self.force_llm_summary_on_merge < 3:
|
||||
logger.warning(
|
||||
f"force_llm_summary_on_merge should be at least 3, got {self.force_llm_summary_on_merge}"
|
||||
)
|
||||
if self.summary_max_tokens * self.force_llm_summary_on_merge > self.summary_context_size:
|
||||
logger.warning(
|
||||
f"summary_context_size must be at least summary_max_tokens * force_llm_summary_on_merge, got {self.summary_context_size}"
|
||||
)
|
||||
if self.summary_context_size > self.max_total_tokens:
|
||||
logger.warning(
|
||||
f"summary_context_size must be less than max_total_tokens, got {self.summary_context_size}"
|
||||
)
|
||||
|
||||
# Fix global_config now
|
||||
global_config = asdict(self)
|
||||
|
||||
|
|
|
|||
|
|
@ -124,10 +124,11 @@ async def _handle_entity_relation_summary(
|
|||
"""Handle entity relation description summary using map-reduce approach.
|
||||
|
||||
This function summarizes a list of descriptions using a map-reduce strategy:
|
||||
1. If total tokens <= summary_max_tokens, summarize directly
|
||||
2. Otherwise, split descriptions into chunks that fit within token limits
|
||||
3. Summarize each chunk, then recursively process the summaries
|
||||
4. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
|
||||
1. If total tokens < summary_context_size and len(description_list) < force_llm_summary_on_merge, no need to summarize
|
||||
2. If total tokens < summary_max_tokens, summarize with LLM directly
|
||||
3. Otherwise, split descriptions into chunks that fit within token limits
|
||||
4. Summarize each chunk, then recursively process the summaries
|
||||
5. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
|
||||
|
||||
Args:
|
||||
entity_or_relation_name: Name of the entity or relation being summarized
|
||||
|
|
@ -148,6 +149,7 @@ async def _handle_entity_relation_summary(
|
|||
|
||||
# Get configuration
|
||||
tokenizer: Tokenizer = global_config["tokenizer"]
|
||||
summary_context_size = global_config["summary_context_size"]
|
||||
summary_max_tokens = global_config["summary_max_tokens"]
|
||||
|
||||
current_list = description_list[:] # Copy the list to avoid modifying original
|
||||
|
|
@ -158,11 +160,11 @@ async def _handle_entity_relation_summary(
|
|||
total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list)
|
||||
|
||||
# If total length is within limits, perform final summarization
|
||||
if (
|
||||
total_tokens <= summary_max_tokens
|
||||
or len(current_list) < force_llm_summary_on_merge
|
||||
):
|
||||
if len(current_list) < force_llm_summary_on_merge:
|
||||
if total_tokens <= summary_context_size:
|
||||
if (
|
||||
len(current_list) < force_llm_summary_on_merge
|
||||
and total_tokens < summary_max_tokens
|
||||
):
|
||||
# Already the final result
|
||||
final_description = seperator.join(current_list)
|
||||
return final_description if final_description else ""
|
||||
|
|
@ -184,9 +186,9 @@ async def _handle_entity_relation_summary(
|
|||
desc_tokens = len(tokenizer.encode(desc))
|
||||
|
||||
# If adding current description would exceed limit, finalize current chunk
|
||||
if current_tokens + desc_tokens > summary_max_tokens and current_chunk:
|
||||
if current_tokens + desc_tokens > summary_context_size and current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
current_chunk = [desc]
|
||||
current_chunk = [desc] # Intial chunk for next group
|
||||
current_tokens = desc_tokens
|
||||
else:
|
||||
current_chunk.append(desc)
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ export type LightragStatus = {
|
|||
embedding_binding: string
|
||||
embedding_binding_host: string
|
||||
embedding_model: string
|
||||
max_tokens: number
|
||||
kv_storage: string
|
||||
doc_status_storage: string
|
||||
graph_storage: string
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue