diff --git a/README-zh.md b/README-zh.md index d6aef2c8..02d7295c 100644 --- a/README-zh.md +++ b/README-zh.md @@ -304,16 +304,14 @@ class QueryParam: If None, keeps all chunks returned from initial retrieval. """ - max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000")) - """Maximum number of tokens allowed for each retrieved text chunk.""" + max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000")) + """Maximum number of tokens allocated for entity context in unified token control system.""" - max_token_for_global_context: int = int( - os.getenv("MAX_TOKEN_RELATION_DESC", "4000") - ) - """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" + max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000")) + """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) - """Maximum number of tokens allocated for entity descriptions in local retrieval.""" + max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000")) + """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" hl_keywords: list[str] = field(default_factory=list) """List of high-level keywords to prioritize in retrieval.""" diff --git a/README.md b/README.md index 5fb6149b..a04eb1d7 100644 --- a/README.md +++ b/README.md @@ -311,16 +311,14 @@ class QueryParam: If None, keeps all chunks returned from initial retrieval. """ - max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000")) - """Maximum number of tokens allowed for each retrieved text chunk.""" + max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000")) + """Maximum number of tokens allocated for entity context in unified token control system.""" - max_token_for_global_context: int = int( - os.getenv("MAX_TOKEN_RELATION_DESC", "4000") - ) - """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" + max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000")) + """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) - """Maximum number of tokens allocated for entity descriptions in local retrieval.""" + max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000")) + """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" conversation_history: list[dict[str, str]] = field(default_factory=list) """Stores past conversation history to maintain context. diff --git a/env.example b/env.example index 4515fe34..ec5d0bad 100644 --- a/env.example +++ b/env.example @@ -50,9 +50,12 @@ OLLAMA_EMULATING_MODEL_TAG=latest ### RAG Query Configuration # HISTORY_TURNS=3 -# MAX_TOKEN_TEXT_CHUNK=6000 -# MAX_TOKEN_RELATION_DESC=4000 -# MAX_TOKEN_ENTITY_DESC=4000 + +### These parameters provide more precise control over total token usage +# MAX_ENTITY_TOKENS=10000 +# MAX_RELATION_TOKENS=10000 +# MAX_TOTAL_TOKENS=32000 + # COSINE_THRESHOLD=0.2 ### Number of entities or relations to retrieve from KG # TOP_K=60 diff --git a/lightrag/api/routers/query_routes.py b/lightrag/api/routers/query_routes.py index 0a0c6227..4005b599 100644 --- a/lightrag/api/routers/query_routes.py +++ b/lightrag/api/routers/query_routes.py @@ -61,22 +61,22 @@ class QueryRequest(BaseModel): description="Number of text chunks to keep after reranking.", ) - max_token_for_text_unit: Optional[int] = Field( - gt=1, + max_entity_tokens: Optional[int] = Field( default=None, - description="Maximum number of tokens allowed for each retrieved text chunk.", + description="Maximum number of tokens allocated for entity context in unified token control system.", + ge=1, ) - max_token_for_global_context: Optional[int] = Field( - gt=1, + max_relation_tokens: Optional[int] = Field( default=None, - description="Maximum number of tokens allocated for relationship descriptions in global retrieval.", + description="Maximum number of tokens allocated for relationship context in unified token control system.", + ge=1, ) - max_token_for_local_context: Optional[int] = Field( - gt=1, + max_total_tokens: Optional[int] = Field( default=None, - description="Maximum number of tokens allocated for entity descriptions in local retrieval.", + description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).", + ge=1, ) conversation_history: Optional[List[Dict[str, Any]]] = Field( diff --git a/lightrag/base.py b/lightrag/base.py index 97564ac2..67d641ca 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -70,16 +70,14 @@ class QueryParam: If None, keeps all chunks returned from initial retrieval. """ - max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "6000")) - """Maximum number of tokens allowed for each retrieved text chunk.""" + max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000")) + """Maximum number of tokens allocated for entity context in unified token control system.""" - max_token_for_global_context: int = int( - os.getenv("MAX_TOKEN_RELATION_DESC", "4000") - ) - """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" + max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000")) + """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) - """Maximum number of tokens allocated for entity descriptions in local retrieval.""" + max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000")) + """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" hl_keywords: list[str] = field(default_factory=list) """List of high-level keywords to prioritize in retrieval.""" diff --git a/lightrag/operate.py b/lightrag/operate.py index be4499ab..668d42a9 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1569,7 +1569,9 @@ async def kg_query( tokenizer: Tokenizer = global_config["tokenizer"] len_of_prompts = len(tokenizer.encode(query + sys_prompt)) - logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[kg_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})" + ) response = await use_model_func( query, @@ -1692,7 +1694,9 @@ async def extract_keywords_only( tokenizer: Tokenizer = global_config["tokenizer"] len_of_prompts = len(tokenizer.encode(kw_prompt)) - logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})" + ) # 5. Call the LLM for keyword extraction if param.model_func: @@ -1864,7 +1868,7 @@ async def _build_query_context( # Combine entities and relations contexts entities_context = process_combine_contexts( - hl_entities_context, ll_entities_context + ll_entities_context, hl_entities_context ) relations_context = process_combine_contexts( hl_relations_context, ll_relations_context @@ -1894,6 +1898,163 @@ async def _build_query_context( f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks" ) + # Unified token control system - Apply precise token limits to entities and relations + tokenizer = text_chunks_db.global_config.get("tokenizer") + if tokenizer: + # Get new token limits from query_param (with fallback to global_config) + max_entity_tokens = getattr( + query_param, + "max_entity_tokens", + text_chunks_db.global_config.get("MAX_ENTITY_TOKENS", 8000), + ) + max_relation_tokens = getattr( + query_param, + "max_relation_tokens", + text_chunks_db.global_config.get("MAX_RELATION_TOKENS", 6000), + ) + max_total_tokens = getattr( + query_param, + "max_total_tokens", + text_chunks_db.global_config.get("MAX_TOTAL_TOKENS", 32000), + ) + + # Truncate entities based on complete JSON serialization + if entities_context: + original_entity_count = len(entities_context) + entities_context = truncate_list_by_token_size( + entities_context, + key=lambda x: json.dumps(x, ensure_ascii=False), + max_token_size=max_entity_tokens, + tokenizer=tokenizer, + ) + if len(entities_context) < original_entity_count: + logger.debug( + f"Truncated entities: {original_entity_count} -> {len(entities_context)} (entity max tokens: {max_entity_tokens})" + ) + + # Truncate relations based on complete JSON serialization + if relations_context: + original_relation_count = len(relations_context) + relations_context = truncate_list_by_token_size( + relations_context, + key=lambda x: json.dumps(x, ensure_ascii=False), + max_token_size=max_relation_tokens, + tokenizer=tokenizer, + ) + if len(relations_context) < original_relation_count: + logger.debug( + f"Truncated relations: {original_relation_count} -> {len(relations_context)} (relation max tokens: {max_relation_tokens})" + ) + + # Calculate dynamic token limit for text chunks + entities_str = json.dumps(entities_context, ensure_ascii=False) + relations_str = json.dumps(relations_context, ensure_ascii=False) + + # Calculate base context tokens (entities + relations + template) + kg_context_template = """-----Entities(KG)----- + +```json +{entities_str} +``` + +-----Relationships(KG)----- + +```json +{relations_str} +``` + +-----Document Chunks(DC)----- + +```json +[] +``` + +""" + kg_context = kg_context_template.format( + entities_str=entities_str, relations_str=relations_str + ) + kg_context_tokens = len(tokenizer.encode(kg_context)) + + # Calculate actual system prompt overhead dynamically + # 1. Calculate conversation history tokens + history_context = "" + if query_param.conversation_history: + history_context = get_conversation_turns( + query_param.conversation_history, query_param.history_turns + ) + history_tokens = ( + len(tokenizer.encode(history_context)) if history_context else 0 + ) + + # 2. Calculate system prompt template tokens (excluding context_data) + user_prompt = query_param.user_prompt if query_param.user_prompt else "" + response_type = ( + query_param.response_type + if query_param.response_type + else "Multiple Paragraphs" + ) + + # Get the system prompt template from PROMPTS + sys_prompt_template = text_chunks_db.global_config.get( + "system_prompt_template", PROMPTS["rag_response"] + ) + + # Create a sample system prompt with placeholders filled (excluding context_data) + sample_sys_prompt = sys_prompt_template.format( + history=history_context, + context_data="", # Empty for overhead calculation + response_type=response_type, + user_prompt=user_prompt, + ) + sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt)) + + # Total system prompt overhead = template + query tokens + query_tokens = len(tokenizer.encode(query)) + sys_prompt_overhead = sys_prompt_template_tokens + query_tokens + + buffer_tokens = 100 # Safety buffer as requested + + # Calculate available tokens for text chunks + used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens + available_chunk_tokens = max_total_tokens - used_tokens + + logger.debug( + f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" + ) + + # Re-process chunks with dynamic token limit + if text_units_context: + # Create a temporary query_param copy with adjusted chunk token limit + temp_chunks = [ + {"content": chunk["content"], "file_path": chunk["file_path"]} + for chunk in text_units_context + ] + + # Apply token truncation to chunks using the dynamic limit + truncated_chunks = await process_chunks_unified( + query=query, + chunks=temp_chunks, + query_param=query_param, + global_config=text_chunks_db.global_config, + source_type="mixed", + chunk_token_limit=available_chunk_tokens, # Pass dynamic limit + ) + + # Rebuild text_units_context with truncated chunks + text_units_context = [] + for i, chunk in enumerate(truncated_chunks): + text_units_context.append( + { + "id": i + 1, + "content": chunk["content"], + "file_path": chunk.get("file_path", "unknown_source"), + } + ) + + logger.debug( + f"Re-truncated chunks for dynamic token limit: {len(temp_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})" + ) + # not necessary to use LLM to generate a response if not entities_context and not relations_context: return None @@ -1982,18 +2143,6 @@ async def _get_node_data( knowledge_graph_inst, ) - tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer") - len_node_datas = len(node_datas) - node_datas = truncate_list_by_token_size( - node_datas, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_local_context, - tokenizer=tokenizer, - ) - logger.debug( - f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" - ) - logger.info( f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks" ) @@ -2199,20 +2348,9 @@ async def _find_most_related_edges_from_entities( } all_edges_data.append(combined) - tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer") all_edges_data = sorted( all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True ) - all_edges_data = truncate_list_by_token_size( - all_edges_data, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_global_context, - tokenizer=tokenizer, - ) - - logger.debug( - f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})" - ) return all_edges_data @@ -2269,16 +2407,9 @@ async def _get_edge_data( } edge_datas.append(combined) - tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer") edge_datas = sorted( edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True ) - edge_datas = truncate_list_by_token_size( - edge_datas, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_global_context, - tokenizer=tokenizer, - ) use_entities, use_text_units = await asyncio.gather( _find_most_related_entities_from_relationships( edge_datas, @@ -2388,18 +2519,6 @@ async def _find_most_related_entities_from_relationships( combined = {**node, "entity_name": entity_name, "rank": degree} node_datas.append(combined) - tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer") - len_node_datas = len(node_datas) - node_datas = truncate_list_by_token_size( - node_datas, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_local_context, - tokenizer=tokenizer, - ) - logger.debug( - f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" - ) - return node_datas @@ -2491,13 +2610,64 @@ async def naive_query( if chunks is None or len(chunks) == 0: return PROMPTS["fail_response"] - # Process chunks using unified processing + # Calculate dynamic token limit for chunks + # Get token limits from query_param (with fallback to global_config) + max_total_tokens = getattr( + query_param, "max_total_tokens", global_config.get("MAX_TOTAL_TOKENS", 32000) + ) + + # Calculate conversation history tokens + history_context = "" + if query_param.conversation_history: + history_context = get_conversation_turns( + query_param.conversation_history, query_param.history_turns + ) + history_tokens = len(tokenizer.encode(history_context)) if history_context else 0 + + # Calculate system prompt template tokens (excluding content_data) + user_prompt = query_param.user_prompt if query_param.user_prompt else "" + response_type = ( + query_param.response_type + if query_param.response_type + else "Multiple Paragraphs" + ) + + # Use the provided system prompt or default + sys_prompt_template = ( + system_prompt if system_prompt else PROMPTS["naive_rag_response"] + ) + + # Create a sample system prompt with empty content_data to calculate overhead + sample_sys_prompt = sys_prompt_template.format( + content_data="", # Empty for overhead calculation + response_type=response_type, + history=history_context, + user_prompt=user_prompt, + ) + sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt)) + + # Total system prompt overhead = template + query tokens + query_tokens = len(tokenizer.encode(query)) + sys_prompt_overhead = sys_prompt_template_tokens + query_tokens + + buffer_tokens = 100 # Safety buffer + + # Calculate available tokens for chunks + used_tokens = sys_prompt_overhead + buffer_tokens + available_chunk_tokens = max_total_tokens - used_tokens + + logger.debug( + f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" + ) + + # Process chunks using unified processing with dynamic token limit processed_chunks = await process_chunks_unified( query=query, chunks=chunks, query_param=query_param, global_config=global_config, source_type="vector", + chunk_token_limit=available_chunk_tokens, # Pass dynamic limit ) logger.info(f"Final context: {len(processed_chunks)} chunks") @@ -2548,7 +2718,9 @@ async def naive_query( return sys_prompt len_of_prompts = len(tokenizer.encode(query + sys_prompt)) - logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[naive_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})" + ) response = await use_model_func( query, @@ -2672,7 +2844,9 @@ async def kg_query_with_keywords( tokenizer: Tokenizer = global_config["tokenizer"] len_of_prompts = len(tokenizer.encode(query + sys_prompt)) - logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})" + ) # 6. Generate response response = await use_model_func( @@ -2849,6 +3023,7 @@ async def process_chunks_unified( query_param: QueryParam, global_config: dict, source_type: str = "mixed", + chunk_token_limit: int = None, # Add parameter for dynamic token limit ) -> list[dict]: """ Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation. @@ -2859,6 +3034,7 @@ async def process_chunks_unified( query_param: Query parameters containing configuration global_config: Global configuration dictionary source_type: Source type for logging ("vector", "entity", "relationship", "mixed") + chunk_token_limit: Dynamic token limit for chunks (if None, uses default) Returns: Processed and filtered list of text chunks @@ -2901,16 +3077,25 @@ async def process_chunks_unified( # 4. Token-based final truncation tokenizer = global_config.get("tokenizer") if tokenizer and unique_chunks: + # Set default chunk_token_limit if not provided + if chunk_token_limit is None: + # Get default from query_param or global_config + chunk_token_limit = getattr( + query_param, + "max_total_tokens", + global_config.get("MAX_TOTAL_TOKENS", 32000), + ) + original_count = len(unique_chunks) unique_chunks = truncate_list_by_token_size( unique_chunks, key=lambda x: x.get("content", ""), - max_token_size=query_param.max_token_for_text_unit, + max_token_size=chunk_token_limit, tokenizer=tokenizer, ) logger.debug( f"Token truncation: {len(unique_chunks)} chunks from {original_count} " - f"(max tokens: {query_param.max_token_for_text_unit}, source: {source_type})" + f"(chunk available tokens: {chunk_token_limit}, source: {source_type})" ) return unique_chunks diff --git a/lightrag_webui/src/api/lightrag.ts b/lightrag_webui/src/api/lightrag.ts index 24b299aa..77601ec7 100644 --- a/lightrag_webui/src/api/lightrag.ts +++ b/lightrag_webui/src/api/lightrag.ts @@ -90,12 +90,16 @@ export type QueryRequest = { stream?: boolean /** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */ top_k?: number - /** Maximum number of tokens allowed for each retrieved text chunk. */ - max_token_for_text_unit?: number - /** Maximum number of tokens allocated for relationship descriptions in global retrieval. */ - max_token_for_global_context?: number - /** Maximum number of tokens allocated for entity descriptions in local retrieval. */ - max_token_for_local_context?: number + /** Maximum number of text chunks to retrieve and process. */ + chunk_top_k?: number + /** Number of text chunks to keep after reranking. */ + chunk_rerank_top_k?: number + /** Maximum number of tokens allocated for entity context in unified token control system. */ + max_entity_tokens?: number + /** Maximum number of tokens allocated for relationship context in unified token control system. */ + max_relation_tokens?: number + /** Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt). */ + max_total_tokens?: number /** * Stores past conversation history to maintain context. * Format: [{"role": "user/assistant", "content": "message"}]. diff --git a/lightrag_webui/src/components/retrieval/QuerySettings.tsx b/lightrag_webui/src/components/retrieval/QuerySettings.tsx index 735a4190..b21f5b11 100644 --- a/lightrag_webui/src/components/retrieval/QuerySettings.tsx +++ b/lightrag_webui/src/components/retrieval/QuerySettings.tsx @@ -132,30 +132,81 @@ export default function QuerySettings() { + {/* Chunk Top K */} + <> + + + + + + +

{t('retrievePanel.querySettings.chunkTopKTooltip')}

+
+
+
+
+ handleChange('chunk_top_k', v)} + min={1} + placeholder={t('retrievePanel.querySettings.chunkTopKPlaceholder')} + /> +
+ + + {/* Chunk Rerank Top K */} + <> + + + + + + +

{t('retrievePanel.querySettings.chunkRerankTopKTooltip')}

+
+
+
+
+ handleChange('chunk_rerank_top_k', v)} + min={1} + placeholder={t('retrievePanel.querySettings.chunkRerankTopKPlaceholder')} + /> +
+ + {/* Max Tokens */} <> <> - -

{t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}

+

{t('retrievePanel.querySettings.maxEntityTokensTooltip')}

- {/* Removed sr-only label */} handleChange('max_token_for_text_unit', v)} + value={querySettings.max_entity_tokens} + onValueChange={(v) => handleChange('max_entity_tokens', v)} min={1} - placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')} + placeholder={t('retrievePanel.querySettings.maxEntityTokens')} />
@@ -164,24 +215,23 @@ export default function QuerySettings() { - -

{t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}

+

{t('retrievePanel.querySettings.maxRelationTokensTooltip')}

- {/* Removed sr-only label */} handleChange('max_token_for_global_context', v)} + value={querySettings.max_relation_tokens} + onValueChange={(v) => handleChange('max_relation_tokens', v)} min={1} - placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')} + placeholder={t('retrievePanel.querySettings.maxRelationTokens')} />
@@ -190,24 +240,23 @@ export default function QuerySettings() { - -

{t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}

+

{t('retrievePanel.querySettings.maxTotalTokensTooltip')}

- {/* Removed sr-only label */} handleChange('max_token_for_local_context', v)} + id="max_total_tokens" + stepper={1000} + value={querySettings.max_total_tokens} + onValueChange={(v) => handleChange('max_total_tokens', v)} min={1} - placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')} + placeholder={t('retrievePanel.querySettings.maxTotalTokens')} />
diff --git a/lightrag_webui/src/locales/ar.json b/lightrag_webui/src/locales/ar.json index 7751c05c..44a2d65d 100644 --- a/lightrag_webui/src/locales/ar.json +++ b/lightrag_webui/src/locales/ar.json @@ -359,16 +359,22 @@ "singleParagraph": "فقرة واحدة", "bulletPoints": "نقاط نقطية" }, - "topK": "أعلى K نتائج", - "topKTooltip": "عدد العناصر العلوية للاسترجاع. يمثل الكيانات في وضع 'محلي' والعلاقات في وضع 'عالمي'", - "topKPlaceholder": "عدد النتائج", - "maxTokensTextUnit": "أقصى عدد من الرموز لوحدة النص", - "maxTokensTextUnitTooltip": "الحد الأقصى لعدد الرموز المسموح به لكل جزء نصي مسترجع", - "maxTokensGlobalContext": "أقصى عدد من الرموز للسياق العالمي", - "maxTokensGlobalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف العلاقات في الاسترجاع العالمي", - "maxTokensLocalContext": "أقصى عدد من الرموز للسياق المحلي", - "maxTokensLocalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف الكيانات في الاسترجاع المحلي", - "historyTurns": "دورات التاريخ", + "topK": "أعلى K", + "topKTooltip": "عدد العناصر العلوية للاسترداد. يمثل الكيانات في الوضع 'المحلي' والعلاقات في الوضع 'العالمي'.", + "topKPlaceholder": "أدخل قيمة أعلى k", + "chunkTopK": "أعلى K للقطع", + "chunkTopKTooltip": "العدد الأقصى لقطع النص المراد استردادها ومعالجتها.", + "chunkTopKPlaceholder": "أدخل قيمة أعلى k للقطع", + "chunkRerankTopK": "أعلى K لإعادة الترتيب", + "chunkRerankTopKTooltip": "عدد قطع النص المراد الاحتفاظ بها بعد إعادة الترتيب.", + "chunkRerankTopKPlaceholder": "أدخل قيمة أعلى k لإعادة الترتيب", + "maxEntityTokens": "الحد الأقصى لرموز الكيان", + "maxEntityTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق الكيان في نظام التحكم الموحد في الرموز", + "maxRelationTokens": "الحد الأقصى لرموز العلاقة", + "maxRelationTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق العلاقة في نظام التحكم الموحد في الرموز", + "maxTotalTokens": "إجمالي الحد الأقصى للرموز", + "maxTotalTokensTooltip": "الحد الأقصى الإجمالي لميزانية الرموز لسياق الاستعلام بالكامل (الكيانات + العلاقات + الأجزاء + موجه النظام)", + "historyTurns": "أدوار التاريخ", "historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد", "historyTurnsPlaceholder": "عدد دورات التاريخ", "onlyNeedContext": "تحتاج فقط إلى السياق", diff --git a/lightrag_webui/src/locales/en.json b/lightrag_webui/src/locales/en.json index 726ae5b9..219e1fcc 100644 --- a/lightrag_webui/src/locales/en.json +++ b/lightrag_webui/src/locales/en.json @@ -359,15 +359,21 @@ "singleParagraph": "Single Paragraph", "bulletPoints": "Bullet Points" }, - "topK": "Top K Results", - "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode", - "topKPlaceholder": "Number of results", - "maxTokensTextUnit": "Max Tokens for Text Unit", - "maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk", - "maxTokensGlobalContext": "Max Tokens for Global Context", - "maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval", - "maxTokensLocalContext": "Max Tokens for Local Context", - "maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval", + "topK": "Top K", + "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.", + "topKPlaceholder": "Enter top k value", + "chunkTopK": "Chunk Top K", + "chunkTopKTooltip": "Maximum number of text chunks to retrieve and process.", + "chunkTopKPlaceholder": "Enter chunk top k value", + "chunkRerankTopK": "Chunk Rerank Top K", + "chunkRerankTopKTooltip": "Number of text chunks to keep after reranking.", + "chunkRerankTopKPlaceholder": "Enter rerank top k value", + "maxEntityTokens": "Max Entity Tokens", + "maxEntityTokensTooltip": "Maximum number of tokens allocated for entity context in unified token control system", + "maxRelationTokens": "Max Relation Tokens", + "maxRelationTokensTooltip": "Maximum number of tokens allocated for relationship context in unified token control system", + "maxTotalTokens": "Max Total Tokens", + "maxTotalTokensTooltip": "Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)", "historyTurns": "History Turns", "historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context", "historyTurnsPlaceholder": "Number of history turns", diff --git a/lightrag_webui/src/locales/fr.json b/lightrag_webui/src/locales/fr.json index 96a85fac..75ca6732 100644 --- a/lightrag_webui/src/locales/fr.json +++ b/lightrag_webui/src/locales/fr.json @@ -359,15 +359,21 @@ "singleParagraph": "Paragraphe unique", "bulletPoints": "Points à puces" }, - "topK": "Top K résultats", - "topKTooltip": "Nombre d'éléments supérieurs à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'", - "topKPlaceholder": "Nombre de résultats", - "maxTokensTextUnit": "Nombre maximum de jetons pour l'unité de texte", - "maxTokensTextUnitTooltip": "Nombre maximum de jetons autorisés pour chaque fragment de texte récupéré", - "maxTokensGlobalContext": "Nombre maximum de jetons pour le contexte global", - "maxTokensGlobalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des relations dans la récupération globale", - "maxTokensLocalContext": "Nombre maximum de jetons pour le contexte local", - "maxTokensLocalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des entités dans la récupération locale", + "topK": "Top K", + "topKTooltip": "Nombre d'éléments principaux à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'.", + "topKPlaceholder": "Entrez la valeur top k", + "chunkTopK": "Top K des Chunks", + "chunkTopKTooltip": "Nombre maximum de chunks de texte à récupérer et traiter.", + "chunkTopKPlaceholder": "Entrez la valeur top k des chunks", + "chunkRerankTopK": "Top K du Reclassement", + "chunkRerankTopKTooltip": "Nombre de chunks de texte à conserver après reclassement.", + "chunkRerankTopKPlaceholder": "Entrez la valeur top k du reclassement", + "maxEntityTokens": "Limite de jetons d'entité", + "maxEntityTokensTooltip": "Nombre maximum de jetons alloués au contexte d'entité dans le système de contrôle de jetons unifié", + "maxRelationTokens": "Limite de jetons de relation", + "maxRelationTokensTooltip": "Nombre maximum de jetons alloués au contexte de relation dans le système de contrôle de jetons unifié", + "maxTotalTokens": "Limite totale de jetons", + "maxTotalTokensTooltip": "Budget total maximum de jetons pour l'ensemble du contexte de requête (entités + relations + blocs + prompt système)", "historyTurns": "Tours d'historique", "historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse", "historyTurnsPlaceholder": "Nombre de tours d'historique", diff --git a/lightrag_webui/src/locales/zh.json b/lightrag_webui/src/locales/zh.json index fa72ba1f..4482317f 100644 --- a/lightrag_webui/src/locales/zh.json +++ b/lightrag_webui/src/locales/zh.json @@ -359,15 +359,21 @@ "singleParagraph": "单段落", "bulletPoints": "要点" }, - "topK": "Top K结果", - "topKTooltip": "检索的顶部项目数。在'local'模式下表示实体,在'global'模式下表示关系", - "topKPlaceholder": "结果数量", - "maxTokensTextUnit": "文本单元最大令牌数", - "maxTokensTextUnitTooltip": "每个检索文本块允许的最大令牌数", - "maxTokensGlobalContext": "全局上下文最大令牌数", - "maxTokensGlobalContextTooltip": "全局检索中关系描述的最大令牌数", - "maxTokensLocalContext": "本地上下文最大令牌数", - "maxTokensLocalContextTooltip": "本地检索中实体描述的最大令牌数", + "topK": "Top K", + "topKTooltip": "检索的顶部条目数量。在'local'模式下表示实体,在'global'模式下表示关系。", + "topKPlaceholder": "输入top k值", + "chunkTopK": "文本块 Top K", + "chunkTopKTooltip": "检索和处理的最大文本块数量。", + "chunkTopKPlaceholder": "输入文本块top k值", + "chunkRerankTopK": "重排序 Top K", + "chunkRerankTopKTooltip": "重排序后保留的文本块数量。", + "chunkRerankTopKPlaceholder": "输入重排序top k值", + "maxEntityTokens": "实体令牌数上限", + "maxEntityTokensTooltip": "统一令牌控制系统中分配给实体上下文的最大令牌数", + "maxRelationTokens": "关系令牌数上限", + "maxRelationTokensTooltip": "统一令牌控制系统中分配给关系上下文的最大令牌数", + "maxTotalTokens": "总令牌数上限", + "maxTotalTokensTooltip": "整个查询上下文的最大总令牌预算(实体+关系+文档块+系统提示)", "historyTurns": "历史轮次", "historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量", "historyTurnsPlaceholder": "历史轮次数", diff --git a/lightrag_webui/src/locales/zh_TW.json b/lightrag_webui/src/locales/zh_TW.json index 40480fd5..cf571e62 100644 --- a/lightrag_webui/src/locales/zh_TW.json +++ b/lightrag_webui/src/locales/zh_TW.json @@ -300,7 +300,7 @@ "file_path": "來源", "keywords": "Keys", "weight": "權重" - } + } }, "edge": { "title": "關係", @@ -359,15 +359,15 @@ "singleParagraph": "單段落", "bulletPoints": "重點" }, - "topK": "Top K結果", - "topKTooltip": "檢索的前幾項結果數。在'local'模式下表示實體,在'global'模式下表示關係", - "topKPlaceholder": "結果數量", - "maxTokensTextUnit": "文字單元最大權杖數", - "maxTokensTextUnitTooltip": "每個檢索文字區塊允許的最大權杖數", - "maxTokensGlobalContext": "全域上下文最大權杖數", - "maxTokensGlobalContextTooltip": "全域檢索中關係描述的最大權杖數", - "maxTokensLocalContext": "本地上下文最大權杖數", - "maxTokensLocalContextTooltip": "本地檢索中實體描述的最大權杖數", + "topK": "Top K", + "topKTooltip": "檢索的頂部條目數量。在'local'模式下表示實體,在'global'模式下表示關係。", + "topKPlaceholder": "輸入top k值", + "chunkTopK": "文字區塊 Top K", + "chunkTopKTooltip": "檢索和處理的最大文字區塊數量。", + "chunkTopKPlaceholder": "輸入文字區塊top k值", + "chunkRerankTopK": "重新排序 Top K", + "chunkRerankTopKTooltip": "重新排序後保留的文字區塊數量。", + "chunkRerankTopKPlaceholder": "輸入重新排序top k值", "historyTurns": "歷史輪次", "historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量", "historyTurnsPlaceholder": "歷史輪次數", @@ -379,7 +379,13 @@ "streamResponseTooltip": "如果為True,啟用即時串流輸出回應", "userPrompt": "用戶提示詞", "userPromptTooltip": "向LLM提供額外的響應要求(與查詢內容無關,僅用於處理輸出)。", - "userPromptPlaceholder": "輸入自定義提示詞(可選)" + "userPromptPlaceholder": "輸入自定義提示詞(可選)", + "maxEntityTokens": "實體令牌數上限", + "maxEntityTokensTooltip": "統一令牌控制系統中分配給實體上下文的最大令牌數", + "maxRelationTokens": "關係令牌數上限", + "maxRelationTokensTooltip": "統一令牌控制系統中分配給關係上下文的最大令牌數", + "maxTotalTokens": "總令牌數上限", + "maxTotalTokensTooltip": "整個查詢上下文的最大總令牌預算(實體+關係+文檔塊+系統提示)" } }, "apiSite": { diff --git a/lightrag_webui/src/stores/settings.ts b/lightrag_webui/src/stores/settings.ts index 5942ddca..223432e4 100644 --- a/lightrag_webui/src/stores/settings.ts +++ b/lightrag_webui/src/stores/settings.ts @@ -111,9 +111,11 @@ const useSettingsStoreBase = create()( mode: 'global', response_type: 'Multiple Paragraphs', top_k: 10, - max_token_for_text_unit: 6000, - max_token_for_global_context: 4000, - max_token_for_local_context: 4000, + chunk_top_k: 5, + chunk_rerank_top_k: 5, + max_entity_tokens: 10000, + max_relation_tokens: 10000, + max_total_tokens: 32000, only_need_context: false, only_need_prompt: false, stream: true, @@ -192,7 +194,7 @@ const useSettingsStoreBase = create()( { name: 'settings-storage', storage: createJSONStorage(() => localStorage), - version: 14, + version: 15, migrate: (state: any, version: number) => { if (version < 2) { state.showEdgeLabel = false @@ -215,9 +217,9 @@ const useSettingsStoreBase = create()( mode: 'global', response_type: 'Multiple Paragraphs', top_k: 10, - max_token_for_text_unit: 4000, - max_token_for_global_context: 4000, - max_token_for_local_context: 4000, + max_entity_tokens: 10000, + max_relation_tokens: 10000, + max_total_tokens: 32000, only_need_context: false, only_need_prompt: false, stream: true, @@ -260,6 +262,26 @@ const useSettingsStoreBase = create()( // Add backendMaxGraphNodes field for older versions state.backendMaxGraphNodes = null } + if (version < 15) { + // 完整更新querySettings到统一token控制系统 + state.querySettings = { + mode: 'global', + response_type: 'Multiple Paragraphs', + top_k: 10, + chunk_top_k: 5, + chunk_rerank_top_k: 5, + max_entity_tokens: 10000, + max_relation_tokens: 10000, + max_total_tokens: 32000, + only_need_context: false, + only_need_prompt: false, + stream: true, + history_turns: 3, + hl_keywords: [], + ll_keywords: [], + user_prompt: '' + } + } return state } }