From 3c530b21b60de2f7584d86e88ed7351319625598 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Thu, 31 Jul 2025 13:00:09 +0800
Subject: [PATCH] Update README

---
 README-zh.md |  2 +-
 README.md    |  2 +-
 env.example  | 10 +++++++---
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/README-zh.md b/README-zh.md
index d0de20da..40dfc777 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -320,7 +320,7 @@ class QueryParam:
     max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
     """Maximum number of tokens allocated for relationship context in unified token control system."""
 
-    max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
+    max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000"))
     """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
 
     hl_keywords: list[str] = field(default_factory=list)
diff --git a/README.md b/README.md
index 664a852e..a6002e6a 100644
--- a/README.md
+++ b/README.md
@@ -327,7 +327,7 @@ class QueryParam:
     max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
     """Maximum number of tokens allocated for relationship context in unified token control system."""
 
-    max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
+    max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000"))
     """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
 
     conversation_history: list[dict[str, str]] = field(default_factory=list)
diff --git a/env.example b/env.example
index 0b863c9b..699fee01 100644
--- a/env.example
+++ b/env.example
@@ -48,15 +48,19 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 # LIGHTRAG_API_KEY=your-secure-api-key-here
 # WHITELIST_PATHS=/health,/api/*
 
-########################
+######################################################################################
 ### Query Configuration
-########################
+###
+### How to control the context lenght sent to LLM:
+###    MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS
+###    Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Reation_Tokens
+######################################################################################
 # LLM responde cache for query (Not valid for streaming response)
 ENABLE_LLM_CACHE=true
 # COSINE_THRESHOLD=0.2
 ### Number of entities or relations retrieved from KG
 # TOP_K=40
-### Maxmium number or chunks plan to send to LLM
+### Maxmium number or chunks for naive vactor search
 # CHUNK_TOP_K=10
 ### control the actual enties send to LLM
 # MAX_ENTITY_TOKENS=10000