From 3c530b21b60de2f7584d86e88ed7351319625598 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 31 Jul 2025 13:00:09 +0800 Subject: [PATCH] Update README --- README-zh.md | 2 +- README.md | 2 +- env.example | 10 +++++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/README-zh.md b/README-zh.md index d0de20da..40dfc777 100644 --- a/README-zh.md +++ b/README-zh.md @@ -320,7 +320,7 @@ class QueryParam: max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000")) """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000")) + max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000")) """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" hl_keywords: list[str] = field(default_factory=list) diff --git a/README.md b/README.md index 664a852e..a6002e6a 100644 --- a/README.md +++ b/README.md @@ -327,7 +327,7 @@ class QueryParam: max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000")) """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000")) + max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000")) """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" conversation_history: list[dict[str, str]] = field(default_factory=list) diff --git a/env.example b/env.example index 0b863c9b..699fee01 100644 --- a/env.example +++ b/env.example @@ -48,15 +48,19 @@ OLLAMA_EMULATING_MODEL_TAG=latest # LIGHTRAG_API_KEY=your-secure-api-key-here # WHITELIST_PATHS=/health,/api/* -######################## +###################################################################################### ### Query Configuration -######################## +### +### How to control the context lenght sent to LLM: +### MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS +### Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Reation_Tokens +###################################################################################### # LLM responde cache for query (Not valid for streaming response) ENABLE_LLM_CACHE=true # COSINE_THRESHOLD=0.2 ### Number of entities or relations retrieved from KG # TOP_K=40 -### Maxmium number or chunks plan to send to LLM +### Maxmium number or chunks for naive vactor search # CHUNK_TOP_K=10 ### control the actual enties send to LLM # MAX_ENTITY_TOKENS=10000