refac: Rename summary_max_tokens to summary_context_size, comprehensive parameter validation for summary configuration
- Update algorithm logic in operate.py for better token management - Fix health endpoint to use correct parameter names
This commit is contained in:
parent
91767ffcee
commit
de2daf6565
10 changed files with 72 additions and 33 deletions
|
|
@ -268,7 +268,8 @@ if __name__ == "__main__":
|
||||||
| **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
|
| **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
|
||||||
| **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
|
| **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
|
||||||
| **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
|
| **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
|
||||||
| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `30000`(由环境变量 SUMMARY_MAX_TOKENS 设置) |
|
| **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`(由环境变量 SUMMARY_MAX_CONTEXT 设置) |
|
||||||
|
| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`(由环境变量 SUMMARY_MAX_TOKENS 设置) |
|
||||||
| **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`(默认值由环境变量MAX_ASYNC更改) |
|
| **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`(默认值由环境变量MAX_ASYNC更改) |
|
||||||
| **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
|
| **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
|
||||||
| **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2(默认值由环境变量COSINE_THRESHOLD更改) |
|
| **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2(默认值由环境变量COSINE_THRESHOLD更改) |
|
||||||
|
|
@ -598,9 +599,9 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
为了提高检索质量,可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数:
|
为了提高检索质量,可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数:
|
||||||
|
|
||||||
* **Cohere / vLLM**: `cohere_rerank`
|
* **Cohere / vLLM**: `cohere_rerank`
|
||||||
* **Jina AI**: `jina_rerank`
|
* **Jina AI**: `jina_rerank`
|
||||||
* **Aliyun阿里云**: `ali_rerank`
|
* **Aliyun阿里云**: `ali_rerank`
|
||||||
|
|
||||||
您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法,请参阅`examples/rerank_example.py`文件。
|
您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法,请参阅`examples/rerank_example.py`文件。
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -275,7 +275,8 @@ A full list of LightRAG init parameters:
|
||||||
| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
|
| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
|
||||||
| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
|
| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
|
||||||
| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
|
| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
|
||||||
| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `30000`(configured by env var SUMMARY_MAX_TOKENS) |
|
| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`(configured by env var SUMMARY_CONTEXT_SIZE) |
|
||||||
|
| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`(configured by env var SUMMARY_MAX_TOKENS) |
|
||||||
| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) |
|
| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) |
|
||||||
| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
|
| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
|
||||||
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
|
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
|
||||||
|
|
|
||||||
11
env.example
11
env.example
|
|
@ -125,12 +125,13 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
|
||||||
### Chunk size for document splitting, 500~1500 is recommended
|
### Chunk size for document splitting, 500~1500 is recommended
|
||||||
# CHUNK_SIZE=1200
|
# CHUNK_SIZE=1200
|
||||||
# CHUNK_OVERLAP_SIZE=100
|
# CHUNK_OVERLAP_SIZE=100
|
||||||
### Entity and relation summarization configuration
|
|
||||||
### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented), and max tokens send to LLM
|
### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented)
|
||||||
# FORCE_LLM_SUMMARY_ON_MERGE=4
|
# FORCE_LLM_SUMMARY_ON_MERGE=4
|
||||||
# SUMMARY_MAX_TOKENS=30000
|
### Number of tokens to trigger LLM summary on entity/relation merge
|
||||||
### Maximum number of entity extraction attempts for ambiguous content
|
# SUMMARY_MAX_TOKENS = 500
|
||||||
# MAX_GLEANING=1
|
### Maximum context size sent to LLM for description summary
|
||||||
|
# SUMMARY_CONTEXT_SIZE=10000
|
||||||
|
|
||||||
###############################
|
###############################
|
||||||
### Concurrency Configuration
|
### Concurrency Configuration
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ from lightrag.constants import (
|
||||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
|
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
|
||||||
DEFAULT_MAX_ASYNC,
|
DEFAULT_MAX_ASYNC,
|
||||||
DEFAULT_SUMMARY_MAX_TOKENS,
|
DEFAULT_SUMMARY_MAX_TOKENS,
|
||||||
|
DEFAULT_SUMMARY_CONTEXT_SIZE,
|
||||||
DEFAULT_SUMMARY_LANGUAGE,
|
DEFAULT_SUMMARY_LANGUAGE,
|
||||||
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
|
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
|
||||||
DEFAULT_EMBEDDING_BATCH_NUM,
|
DEFAULT_EMBEDDING_BATCH_NUM,
|
||||||
|
|
@ -119,10 +120,18 @@ def parse_args() -> argparse.Namespace:
|
||||||
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
|
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--max-tokens",
|
"--summary-max-tokens",
|
||||||
type=int,
|
type=int,
|
||||||
default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
|
default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
|
||||||
help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
|
help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--summary-context-size",
|
||||||
|
type=int,
|
||||||
|
default=get_env_value(
|
||||||
|
"SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int
|
||||||
|
),
|
||||||
|
help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
LightRAG FastAPI Server
|
LightRAG FastAPI Server
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import FastAPI, Depends, HTTPException, status
|
from fastapi import FastAPI, Depends, HTTPException
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -472,7 +472,8 @@ def create_app(args):
|
||||||
),
|
),
|
||||||
llm_model_name=args.llm_model,
|
llm_model_name=args.llm_model,
|
||||||
llm_model_max_async=args.max_async,
|
llm_model_max_async=args.max_async,
|
||||||
summary_max_tokens=args.max_tokens,
|
summary_max_tokens=args.summary_max_tokens,
|
||||||
|
summary_context_size=args.summary_context_size,
|
||||||
chunk_token_size=int(args.chunk_size),
|
chunk_token_size=int(args.chunk_size),
|
||||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||||
llm_model_kwargs=(
|
llm_model_kwargs=(
|
||||||
|
|
@ -510,7 +511,8 @@ def create_app(args):
|
||||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||||
llm_model_name=args.llm_model,
|
llm_model_name=args.llm_model,
|
||||||
llm_model_max_async=args.max_async,
|
llm_model_max_async=args.max_async,
|
||||||
summary_max_tokens=args.max_tokens,
|
summary_max_tokens=args.summary_max_tokens,
|
||||||
|
summary_context_size=args.summary_context_size,
|
||||||
embedding_func=embedding_func,
|
embedding_func=embedding_func,
|
||||||
kv_storage=args.kv_storage,
|
kv_storage=args.kv_storage,
|
||||||
graph_storage=args.graph_storage,
|
graph_storage=args.graph_storage,
|
||||||
|
|
@ -598,7 +600,7 @@ def create_app(args):
|
||||||
username = form_data.username
|
username = form_data.username
|
||||||
if auth_handler.accounts.get(username) != form_data.password:
|
if auth_handler.accounts.get(username) != form_data.password:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials"
|
status_code=401, detail="Incorrect credentials"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Regular user login
|
# Regular user login
|
||||||
|
|
@ -642,7 +644,8 @@ def create_app(args):
|
||||||
"embedding_binding": args.embedding_binding,
|
"embedding_binding": args.embedding_binding,
|
||||||
"embedding_binding_host": args.embedding_binding_host,
|
"embedding_binding_host": args.embedding_binding_host,
|
||||||
"embedding_model": args.embedding_model,
|
"embedding_model": args.embedding_model,
|
||||||
"max_tokens": args.max_tokens,
|
"summary_max_tokens": args.summary_max_tokens,
|
||||||
|
"summary_context_size": args.summary_context_size,
|
||||||
"kv_storage": args.kv_storage,
|
"kv_storage": args.kv_storage,
|
||||||
"doc_status_storage": args.doc_status_storage,
|
"doc_status_storage": args.doc_status_storage,
|
||||||
"graph_storage": args.graph_storage,
|
"graph_storage": args.graph_storage,
|
||||||
|
|
|
||||||
|
|
@ -242,8 +242,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
||||||
ASCIIColors.yellow(f"{args.llm_model}")
|
ASCIIColors.yellow(f"{args.llm_model}")
|
||||||
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
|
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.max_async}")
|
ASCIIColors.yellow(f"{args.max_async}")
|
||||||
ASCIIColors.white(" ├─ Max Tokens: ", end="")
|
ASCIIColors.white(" ├─ Summary Context Size: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.max_tokens}")
|
ASCIIColors.yellow(f"{args.summary_context_size}")
|
||||||
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
|
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.enable_llm_cache}")
|
ASCIIColors.yellow(f"{args.enable_llm_cache}")
|
||||||
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
|
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,11 @@ DEFAULT_MAX_GRAPH_NODES = 1000
|
||||||
|
|
||||||
# Default values for extraction settings
|
# Default values for extraction settings
|
||||||
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
|
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
|
||||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
|
|
||||||
DEFAULT_MAX_GLEANING = 1
|
DEFAULT_MAX_GLEANING = 1
|
||||||
DEFAULT_SUMMARY_MAX_TOKENS = 30000 # Default maximum token size
|
|
||||||
|
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
|
||||||
|
DEFAULT_SUMMARY_MAX_TOKENS = 500 # Max token size for entity/relation summary
|
||||||
|
DEFAULT_SUMMARY_CONTEXT_SIZE = 10000 # Default maximum token size
|
||||||
|
|
||||||
# Separator for graph fields
|
# Separator for graph fields
|
||||||
GRAPH_FIELD_SEP = "<SEP>"
|
GRAPH_FIELD_SEP = "<SEP>"
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ from lightrag.constants import (
|
||||||
DEFAULT_KG_CHUNK_PICK_METHOD,
|
DEFAULT_KG_CHUNK_PICK_METHOD,
|
||||||
DEFAULT_MIN_RERANK_SCORE,
|
DEFAULT_MIN_RERANK_SCORE,
|
||||||
DEFAULT_SUMMARY_MAX_TOKENS,
|
DEFAULT_SUMMARY_MAX_TOKENS,
|
||||||
|
DEFAULT_SUMMARY_CONTEXT_SIZE,
|
||||||
DEFAULT_MAX_ASYNC,
|
DEFAULT_MAX_ASYNC,
|
||||||
DEFAULT_MAX_PARALLEL_INSERT,
|
DEFAULT_MAX_PARALLEL_INSERT,
|
||||||
DEFAULT_MAX_GRAPH_NODES,
|
DEFAULT_MAX_GRAPH_NODES,
|
||||||
|
|
@ -285,6 +286,11 @@ class LightRAG:
|
||||||
summary_max_tokens: int = field(
|
summary_max_tokens: int = field(
|
||||||
default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
|
default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
|
||||||
)
|
)
|
||||||
|
"""Maximum tokens allowed for entity/relation description."""
|
||||||
|
|
||||||
|
summary_context_size: int = field(
|
||||||
|
default=int(os.getenv("SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE))
|
||||||
|
)
|
||||||
"""Maximum number of tokens allowed per LLM response."""
|
"""Maximum number of tokens allowed per LLM response."""
|
||||||
|
|
||||||
llm_model_max_async: int = field(
|
llm_model_max_async: int = field(
|
||||||
|
|
@ -416,6 +422,21 @@ class LightRAG:
|
||||||
if self.ollama_server_infos is None:
|
if self.ollama_server_infos is None:
|
||||||
self.ollama_server_infos = OllamaServerInfos()
|
self.ollama_server_infos = OllamaServerInfos()
|
||||||
|
|
||||||
|
|
||||||
|
# Validate config
|
||||||
|
if self.force_llm_summary_on_merge < 3:
|
||||||
|
logger.warning(
|
||||||
|
f"force_llm_summary_on_merge should be at least 3, got {self.force_llm_summary_on_merge}"
|
||||||
|
)
|
||||||
|
if self.summary_max_tokens * self.force_llm_summary_on_merge > self.summary_context_size:
|
||||||
|
logger.warning(
|
||||||
|
f"summary_context_size must be at least summary_max_tokens * force_llm_summary_on_merge, got {self.summary_context_size}"
|
||||||
|
)
|
||||||
|
if self.summary_context_size > self.max_total_tokens:
|
||||||
|
logger.warning(
|
||||||
|
f"summary_context_size must be less than max_total_tokens, got {self.summary_context_size}"
|
||||||
|
)
|
||||||
|
|
||||||
# Fix global_config now
|
# Fix global_config now
|
||||||
global_config = asdict(self)
|
global_config = asdict(self)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,10 +124,11 @@ async def _handle_entity_relation_summary(
|
||||||
"""Handle entity relation description summary using map-reduce approach.
|
"""Handle entity relation description summary using map-reduce approach.
|
||||||
|
|
||||||
This function summarizes a list of descriptions using a map-reduce strategy:
|
This function summarizes a list of descriptions using a map-reduce strategy:
|
||||||
1. If total tokens <= summary_max_tokens, summarize directly
|
1. If total tokens < summary_context_size and len(description_list) < force_llm_summary_on_merge, no need to summarize
|
||||||
2. Otherwise, split descriptions into chunks that fit within token limits
|
2. If total tokens < summary_max_tokens, summarize with LLM directly
|
||||||
3. Summarize each chunk, then recursively process the summaries
|
3. Otherwise, split descriptions into chunks that fit within token limits
|
||||||
4. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
|
4. Summarize each chunk, then recursively process the summaries
|
||||||
|
5. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
entity_or_relation_name: Name of the entity or relation being summarized
|
entity_or_relation_name: Name of the entity or relation being summarized
|
||||||
|
|
@ -148,6 +149,7 @@ async def _handle_entity_relation_summary(
|
||||||
|
|
||||||
# Get configuration
|
# Get configuration
|
||||||
tokenizer: Tokenizer = global_config["tokenizer"]
|
tokenizer: Tokenizer = global_config["tokenizer"]
|
||||||
|
summary_context_size = global_config["summary_context_size"]
|
||||||
summary_max_tokens = global_config["summary_max_tokens"]
|
summary_max_tokens = global_config["summary_max_tokens"]
|
||||||
|
|
||||||
current_list = description_list[:] # Copy the list to avoid modifying original
|
current_list = description_list[:] # Copy the list to avoid modifying original
|
||||||
|
|
@ -158,11 +160,11 @@ async def _handle_entity_relation_summary(
|
||||||
total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list)
|
total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list)
|
||||||
|
|
||||||
# If total length is within limits, perform final summarization
|
# If total length is within limits, perform final summarization
|
||||||
if (
|
if total_tokens <= summary_context_size:
|
||||||
total_tokens <= summary_max_tokens
|
if (
|
||||||
or len(current_list) < force_llm_summary_on_merge
|
len(current_list) < force_llm_summary_on_merge
|
||||||
):
|
and total_tokens < summary_max_tokens
|
||||||
if len(current_list) < force_llm_summary_on_merge:
|
):
|
||||||
# Already the final result
|
# Already the final result
|
||||||
final_description = seperator.join(current_list)
|
final_description = seperator.join(current_list)
|
||||||
return final_description if final_description else ""
|
return final_description if final_description else ""
|
||||||
|
|
@ -184,9 +186,9 @@ async def _handle_entity_relation_summary(
|
||||||
desc_tokens = len(tokenizer.encode(desc))
|
desc_tokens = len(tokenizer.encode(desc))
|
||||||
|
|
||||||
# If adding current description would exceed limit, finalize current chunk
|
# If adding current description would exceed limit, finalize current chunk
|
||||||
if current_tokens + desc_tokens > summary_max_tokens and current_chunk:
|
if current_tokens + desc_tokens > summary_context_size and current_chunk:
|
||||||
chunks.append(current_chunk)
|
chunks.append(current_chunk)
|
||||||
current_chunk = [desc]
|
current_chunk = [desc] # Intial chunk for next group
|
||||||
current_tokens = desc_tokens
|
current_tokens = desc_tokens
|
||||||
else:
|
else:
|
||||||
current_chunk.append(desc)
|
current_chunk.append(desc)
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,6 @@ export type LightragStatus = {
|
||||||
embedding_binding: string
|
embedding_binding: string
|
||||||
embedding_binding_host: string
|
embedding_binding_host: string
|
||||||
embedding_model: string
|
embedding_model: string
|
||||||
max_tokens: number
|
|
||||||
kv_storage: string
|
kv_storage: string
|
||||||
doc_status_storage: string
|
doc_status_storage: string
|
||||||
graph_storage: string
|
graph_storage: string
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue