From 598eecd06d9e91df8d0e77ef0637d895dbf97564 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 28 Jul 2025 00:49:08 +0800 Subject: [PATCH] Refactor: Rename llm_model_max_token_size to summary_max_tokens This commit renames the parameter 'llm_model_max_token_size' to 'summary_max_tokens' for better clarity, as it specifically controls the token limit for entity relation summaries. --- README-zh.md | 2 +- README.md | 2 +- examples/lightrag_ollama_demo.py | 2 +- examples/unofficial-sample/lightrag_cloudflare_demo.py | 2 +- .../lightrag_openai_neo4j_milvus_redis_demo.py | 2 +- lightrag/api/config.py | 8 ++++---- lightrag/api/lightrag_server.py | 4 ++-- lightrag/constants.py | 2 +- lightrag/lightrag.py | 5 ++++- lightrag/operate.py | 2 +- 10 files changed, 17 insertions(+), 14 deletions(-) diff --git a/README-zh.md b/README-zh.md index 1b5ce4dc..3326d95b 100644 --- a/README-zh.md +++ b/README-zh.md @@ -265,7 +265,7 @@ if __name__ == "__main__": | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` | | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` | | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` | -| **llm_model_max_token_size** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`(默认值由环境变量MAX_TOKENS更改) | +| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`(默认值由环境变量MAX_TOKENS更改) | | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`(默认值由环境变量MAX_ASYNC更改) | | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | | | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2(默认值由环境变量COSINE_THRESHOLD更改) | diff --git a/README.md b/README.md index c68665c1..1af28d3b 100644 --- a/README.md +++ b/README.md @@ -272,7 +272,7 @@ A full list of LightRAG init parameters: | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` | | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` | | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` | -| **llm_model_max_token_size** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`(default value changed by env var MAX_TOKENS) | +| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`(default value changed by env var MAX_TOKENS) | | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) | | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | | | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) | diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index b012f685..18fcc790 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -87,7 +87,7 @@ async def initialize_rag(): working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, llm_model_name=os.getenv("LLM_MODEL", "qwen2.5-coder:7b"), - llm_model_max_token_size=8192, + summary_max_tokens=8192, llm_model_kwargs={ "host": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"), "options": {"num_ctx": 8192}, diff --git a/examples/unofficial-sample/lightrag_cloudflare_demo.py b/examples/unofficial-sample/lightrag_cloudflare_demo.py index b5dbabd1..b53e6714 100644 --- a/examples/unofficial-sample/lightrag_cloudflare_demo.py +++ b/examples/unofficial-sample/lightrag_cloudflare_demo.py @@ -211,7 +211,7 @@ async def initialize_rag(): max_parallel_insert=2, llm_model_func=cloudflare_worker.query, llm_model_name=os.getenv("LLM_MODEL", LLM_MODEL), - llm_model_max_token_size=4080, + summary_max_tokens=4080, embedding_func=EmbeddingFunc( embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "2048")), diff --git a/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py b/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py index 38d89218..00845796 100644 --- a/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py +++ b/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py @@ -56,7 +56,7 @@ async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, - llm_model_max_token_size=32768, + summary_max_tokens=10000, embedding_func=embedding_func, chunk_token_size=512, chunk_overlap_token_size=256, diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 0294657b..e56b1749 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -22,7 +22,7 @@ from lightrag.constants import ( DEFAULT_MIN_RERANK_SCORE, DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, DEFAULT_MAX_ASYNC, - DEFAULT_MAX_TOKENS, + DEFAULT_SUMMARY_MAX_TOKENS, DEFAULT_SUMMARY_LANGUAGE, DEFAULT_EMBEDDING_FUNC_MAX_ASYNC, DEFAULT_EMBEDDING_BATCH_NUM, @@ -118,13 +118,13 @@ def parse_args() -> argparse.Namespace: "--max-async", type=int, default=get_env_value("MAX_ASYNC", DEFAULT_MAX_ASYNC, int), - help="Maximum async operations (default: from env or 4)", + help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})", ) parser.add_argument( "--max-tokens", type=int, - default=get_env_value("MAX_TOKENS", DEFAULT_MAX_TOKENS, int), - help="Maximum token size (default: from env or 32000)", + default=get_env_value("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int), + help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})", ) # Logging configuration diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 2d05ac97..2ae2d87b 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -347,7 +347,7 @@ def create_app(args): else openai_alike_model_complete, llm_model_name=args.llm_model, llm_model_max_async=args.max_async, - llm_model_max_token_size=args.max_tokens, + summary_max_tokens=args.max_tokens, chunk_token_size=int(args.chunk_size), chunk_overlap_token_size=int(args.chunk_overlap_size), llm_model_kwargs={ @@ -386,7 +386,7 @@ def create_app(args): }, llm_model_name=args.llm_model, llm_model_max_async=args.max_async, - llm_model_max_token_size=args.max_tokens, + summary_max_tokens=args.max_tokens, embedding_func=embedding_func, kv_storage=args.kv_storage, graph_storage=args.graph_storage, diff --git a/lightrag/constants.py b/lightrag/constants.py index b02bcd6e..7fc47e21 100644 --- a/lightrag/constants.py +++ b/lightrag/constants.py @@ -14,7 +14,7 @@ DEFAULT_TIMEOUT = 150 DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4 DEFAULT_MAX_GLEANING = 1 -DEFAULT_MAX_TOKENS = 10000 # Default maximum token size +DEFAULT_SUMMARY_MAX_TOKENS = 10000 # Default maximum token size # Separator for graph fields GRAPH_FIELD_SEP = "" diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 9df35175..c930dc66 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -32,6 +32,7 @@ from lightrag.constants import ( DEFAULT_COSINE_THRESHOLD, DEFAULT_RELATED_CHUNK_NUMBER, DEFAULT_MIN_RERANK_SCORE, + DEFAULT_SUMMARY_MAX_TOKENS, ) from lightrag.utils import get_env_value @@ -270,7 +271,9 @@ class LightRAG: llm_model_name: str = field(default="gpt-4o-mini") """Name of the LLM model used for generating responses.""" - llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000))) + summary_max_tokens: int = field( + default=int(os.getenv("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS)) + ) """Maximum number of tokens allowed per LLM response.""" llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4))) diff --git a/lightrag/operate.py b/lightrag/operate.py index 769a9f1e..78836176 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -125,7 +125,7 @@ async def _handle_entity_relation_summary( use_llm_func = partial(use_llm_func, _priority=8) tokenizer: Tokenizer = global_config["tokenizer"] - llm_max_tokens = global_config["llm_model_max_token_size"] + llm_max_tokens = global_config["summary_max_tokens"] language = global_config["addon_params"].get( "language", PROMPTS["DEFAULT_LANGUAGE"]