Update token limit

This commit is contained in:
zrguo 2025-07-14 15:53:48 +08:00
parent ba0cffd853
commit ef2115d437
14 changed files with 459 additions and 172 deletions

View file

@ -304,16 +304,14 @@ class QueryParam:
If None, keeps all chunks returned from initial retrieval.
"""
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
"""Maximum number of tokens allowed for each retrieved text chunk."""
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
"""Maximum number of tokens allocated for entity context in unified token control system."""
max_token_for_global_context: int = int(
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
)
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
"""Maximum number of tokens allocated for relationship context in unified token control system."""
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
hl_keywords: list[str] = field(default_factory=list)
"""List of high-level keywords to prioritize in retrieval."""

View file

@ -311,16 +311,14 @@ class QueryParam:
If None, keeps all chunks returned from initial retrieval.
"""
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
"""Maximum number of tokens allowed for each retrieved text chunk."""
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
"""Maximum number of tokens allocated for entity context in unified token control system."""
max_token_for_global_context: int = int(
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
)
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
"""Maximum number of tokens allocated for relationship context in unified token control system."""
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
conversation_history: list[dict[str, str]] = field(default_factory=list)
"""Stores past conversation history to maintain context.

View file

@ -50,9 +50,12 @@ OLLAMA_EMULATING_MODEL_TAG=latest
### RAG Query Configuration
# HISTORY_TURNS=3
# MAX_TOKEN_TEXT_CHUNK=6000
# MAX_TOKEN_RELATION_DESC=4000
# MAX_TOKEN_ENTITY_DESC=4000
### These parameters provide more precise control over total token usage
# MAX_ENTITY_TOKENS=10000
# MAX_RELATION_TOKENS=10000
# MAX_TOTAL_TOKENS=32000
# COSINE_THRESHOLD=0.2
### Number of entities or relations to retrieve from KG
# TOP_K=60

View file

@ -61,22 +61,22 @@ class QueryRequest(BaseModel):
description="Number of text chunks to keep after reranking.",
)
max_token_for_text_unit: Optional[int] = Field(
gt=1,
max_entity_tokens: Optional[int] = Field(
default=None,
description="Maximum number of tokens allowed for each retrieved text chunk.",
description="Maximum number of tokens allocated for entity context in unified token control system.",
ge=1,
)
max_token_for_global_context: Optional[int] = Field(
gt=1,
max_relation_tokens: Optional[int] = Field(
default=None,
description="Maximum number of tokens allocated for relationship descriptions in global retrieval.",
description="Maximum number of tokens allocated for relationship context in unified token control system.",
ge=1,
)
max_token_for_local_context: Optional[int] = Field(
gt=1,
max_total_tokens: Optional[int] = Field(
default=None,
description="Maximum number of tokens allocated for entity descriptions in local retrieval.",
description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).",
ge=1,
)
conversation_history: Optional[List[Dict[str, Any]]] = Field(

View file

@ -70,16 +70,14 @@ class QueryParam:
If None, keeps all chunks returned from initial retrieval.
"""
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "6000"))
"""Maximum number of tokens allowed for each retrieved text chunk."""
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
"""Maximum number of tokens allocated for entity context in unified token control system."""
max_token_for_global_context: int = int(
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
)
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
"""Maximum number of tokens allocated for relationship context in unified token control system."""
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
hl_keywords: list[str] = field(default_factory=list)
"""List of high-level keywords to prioritize in retrieval."""

View file

@ -1569,7 +1569,9 @@ async def kg_query(
tokenizer: Tokenizer = global_config["tokenizer"]
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
logger.debug(
f"[kg_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
)
response = await use_model_func(
query,
@ -1692,7 +1694,9 @@ async def extract_keywords_only(
tokenizer: Tokenizer = global_config["tokenizer"]
len_of_prompts = len(tokenizer.encode(kw_prompt))
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
logger.debug(
f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})"
)
# 5. Call the LLM for keyword extraction
if param.model_func:
@ -1864,7 +1868,7 @@ async def _build_query_context(
# Combine entities and relations contexts
entities_context = process_combine_contexts(
hl_entities_context, ll_entities_context
ll_entities_context, hl_entities_context
)
relations_context = process_combine_contexts(
hl_relations_context, ll_relations_context
@ -1894,6 +1898,163 @@ async def _build_query_context(
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
)
# Unified token control system - Apply precise token limits to entities and relations
tokenizer = text_chunks_db.global_config.get("tokenizer")
if tokenizer:
# Get new token limits from query_param (with fallback to global_config)
max_entity_tokens = getattr(
query_param,
"max_entity_tokens",
text_chunks_db.global_config.get("MAX_ENTITY_TOKENS", 8000),
)
max_relation_tokens = getattr(
query_param,
"max_relation_tokens",
text_chunks_db.global_config.get("MAX_RELATION_TOKENS", 6000),
)
max_total_tokens = getattr(
query_param,
"max_total_tokens",
text_chunks_db.global_config.get("MAX_TOTAL_TOKENS", 32000),
)
# Truncate entities based on complete JSON serialization
if entities_context:
original_entity_count = len(entities_context)
entities_context = truncate_list_by_token_size(
entities_context,
key=lambda x: json.dumps(x, ensure_ascii=False),
max_token_size=max_entity_tokens,
tokenizer=tokenizer,
)
if len(entities_context) < original_entity_count:
logger.debug(
f"Truncated entities: {original_entity_count} -> {len(entities_context)} (entity max tokens: {max_entity_tokens})"
)
# Truncate relations based on complete JSON serialization
if relations_context:
original_relation_count = len(relations_context)
relations_context = truncate_list_by_token_size(
relations_context,
key=lambda x: json.dumps(x, ensure_ascii=False),
max_token_size=max_relation_tokens,
tokenizer=tokenizer,
)
if len(relations_context) < original_relation_count:
logger.debug(
f"Truncated relations: {original_relation_count} -> {len(relations_context)} (relation max tokens: {max_relation_tokens})"
)
# Calculate dynamic token limit for text chunks
entities_str = json.dumps(entities_context, ensure_ascii=False)
relations_str = json.dumps(relations_context, ensure_ascii=False)
# Calculate base context tokens (entities + relations + template)
kg_context_template = """-----Entities(KG)-----
```json
{entities_str}
```
-----Relationships(KG)-----
```json
{relations_str}
```
-----Document Chunks(DC)-----
```json
[]
```
"""
kg_context = kg_context_template.format(
entities_str=entities_str, relations_str=relations_str
)
kg_context_tokens = len(tokenizer.encode(kg_context))
# Calculate actual system prompt overhead dynamically
# 1. Calculate conversation history tokens
history_context = ""
if query_param.conversation_history:
history_context = get_conversation_turns(
query_param.conversation_history, query_param.history_turns
)
history_tokens = (
len(tokenizer.encode(history_context)) if history_context else 0
)
# 2. Calculate system prompt template tokens (excluding context_data)
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
response_type = (
query_param.response_type
if query_param.response_type
else "Multiple Paragraphs"
)
# Get the system prompt template from PROMPTS
sys_prompt_template = text_chunks_db.global_config.get(
"system_prompt_template", PROMPTS["rag_response"]
)
# Create a sample system prompt with placeholders filled (excluding context_data)
sample_sys_prompt = sys_prompt_template.format(
history=history_context,
context_data="", # Empty for overhead calculation
response_type=response_type,
user_prompt=user_prompt,
)
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
# Total system prompt overhead = template + query tokens
query_tokens = len(tokenizer.encode(query))
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
buffer_tokens = 100 # Safety buffer as requested
# Calculate available tokens for text chunks
used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
available_chunk_tokens = max_total_tokens - used_tokens
logger.debug(
f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
)
# Re-process chunks with dynamic token limit
if text_units_context:
# Create a temporary query_param copy with adjusted chunk token limit
temp_chunks = [
{"content": chunk["content"], "file_path": chunk["file_path"]}
for chunk in text_units_context
]
# Apply token truncation to chunks using the dynamic limit
truncated_chunks = await process_chunks_unified(
query=query,
chunks=temp_chunks,
query_param=query_param,
global_config=text_chunks_db.global_config,
source_type="mixed",
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
)
# Rebuild text_units_context with truncated chunks
text_units_context = []
for i, chunk in enumerate(truncated_chunks):
text_units_context.append(
{
"id": i + 1,
"content": chunk["content"],
"file_path": chunk.get("file_path", "unknown_source"),
}
)
logger.debug(
f"Re-truncated chunks for dynamic token limit: {len(temp_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
)
# not necessary to use LLM to generate a response
if not entities_context and not relations_context:
return None
@ -1982,18 +2143,6 @@ async def _get_node_data(
knowledge_graph_inst,
)
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
len_node_datas = len(node_datas)
node_datas = truncate_list_by_token_size(
node_datas,
key=lambda x: x["description"] if x["description"] is not None else "",
max_token_size=query_param.max_token_for_local_context,
tokenizer=tokenizer,
)
logger.debug(
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
)
logger.info(
f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
)
@ -2199,20 +2348,9 @@ async def _find_most_related_edges_from_entities(
}
all_edges_data.append(combined)
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
all_edges_data = sorted(
all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
)
all_edges_data = truncate_list_by_token_size(
all_edges_data,
key=lambda x: x["description"] if x["description"] is not None else "",
max_token_size=query_param.max_token_for_global_context,
tokenizer=tokenizer,
)
logger.debug(
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
)
return all_edges_data
@ -2269,16 +2407,9 @@ async def _get_edge_data(
}
edge_datas.append(combined)
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
edge_datas = sorted(
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
)
edge_datas = truncate_list_by_token_size(
edge_datas,
key=lambda x: x["description"] if x["description"] is not None else "",
max_token_size=query_param.max_token_for_global_context,
tokenizer=tokenizer,
)
use_entities, use_text_units = await asyncio.gather(
_find_most_related_entities_from_relationships(
edge_datas,
@ -2388,18 +2519,6 @@ async def _find_most_related_entities_from_relationships(
combined = {**node, "entity_name": entity_name, "rank": degree}
node_datas.append(combined)
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
len_node_datas = len(node_datas)
node_datas = truncate_list_by_token_size(
node_datas,
key=lambda x: x["description"] if x["description"] is not None else "",
max_token_size=query_param.max_token_for_local_context,
tokenizer=tokenizer,
)
logger.debug(
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
)
return node_datas
@ -2491,13 +2610,64 @@ async def naive_query(
if chunks is None or len(chunks) == 0:
return PROMPTS["fail_response"]
# Process chunks using unified processing
# Calculate dynamic token limit for chunks
# Get token limits from query_param (with fallback to global_config)
max_total_tokens = getattr(
query_param, "max_total_tokens", global_config.get("MAX_TOTAL_TOKENS", 32000)
)
# Calculate conversation history tokens
history_context = ""
if query_param.conversation_history:
history_context = get_conversation_turns(
query_param.conversation_history, query_param.history_turns
)
history_tokens = len(tokenizer.encode(history_context)) if history_context else 0
# Calculate system prompt template tokens (excluding content_data)
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
response_type = (
query_param.response_type
if query_param.response_type
else "Multiple Paragraphs"
)
# Use the provided system prompt or default
sys_prompt_template = (
system_prompt if system_prompt else PROMPTS["naive_rag_response"]
)
# Create a sample system prompt with empty content_data to calculate overhead
sample_sys_prompt = sys_prompt_template.format(
content_data="", # Empty for overhead calculation
response_type=response_type,
history=history_context,
user_prompt=user_prompt,
)
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
# Total system prompt overhead = template + query tokens
query_tokens = len(tokenizer.encode(query))
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
buffer_tokens = 100 # Safety buffer
# Calculate available tokens for chunks
used_tokens = sys_prompt_overhead + buffer_tokens
available_chunk_tokens = max_total_tokens - used_tokens
logger.debug(
f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
)
# Process chunks using unified processing with dynamic token limit
processed_chunks = await process_chunks_unified(
query=query,
chunks=chunks,
query_param=query_param,
global_config=global_config,
source_type="vector",
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
)
logger.info(f"Final context: {len(processed_chunks)} chunks")
@ -2548,7 +2718,9 @@ async def naive_query(
return sys_prompt
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}")
logger.debug(
f"[naive_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
)
response = await use_model_func(
query,
@ -2672,7 +2844,9 @@ async def kg_query_with_keywords(
tokenizer: Tokenizer = global_config["tokenizer"]
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
logger.debug(
f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
)
# 6. Generate response
response = await use_model_func(
@ -2849,6 +3023,7 @@ async def process_chunks_unified(
query_param: QueryParam,
global_config: dict,
source_type: str = "mixed",
chunk_token_limit: int = None, # Add parameter for dynamic token limit
) -> list[dict]:
"""
Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.
@ -2859,6 +3034,7 @@ async def process_chunks_unified(
query_param: Query parameters containing configuration
global_config: Global configuration dictionary
source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
chunk_token_limit: Dynamic token limit for chunks (if None, uses default)
Returns:
Processed and filtered list of text chunks
@ -2901,16 +3077,25 @@ async def process_chunks_unified(
# 4. Token-based final truncation
tokenizer = global_config.get("tokenizer")
if tokenizer and unique_chunks:
# Set default chunk_token_limit if not provided
if chunk_token_limit is None:
# Get default from query_param or global_config
chunk_token_limit = getattr(
query_param,
"max_total_tokens",
global_config.get("MAX_TOTAL_TOKENS", 32000),
)
original_count = len(unique_chunks)
unique_chunks = truncate_list_by_token_size(
unique_chunks,
key=lambda x: x.get("content", ""),
max_token_size=query_param.max_token_for_text_unit,
max_token_size=chunk_token_limit,
tokenizer=tokenizer,
)
logger.debug(
f"Token truncation: {len(unique_chunks)} chunks from {original_count} "
f"(max tokens: {query_param.max_token_for_text_unit}, source: {source_type})"
f"(chunk available tokens: {chunk_token_limit}, source: {source_type})"
)
return unique_chunks

View file

@ -90,12 +90,16 @@ export type QueryRequest = {
stream?: boolean
/** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */
top_k?: number
/** Maximum number of tokens allowed for each retrieved text chunk. */
max_token_for_text_unit?: number
/** Maximum number of tokens allocated for relationship descriptions in global retrieval. */
max_token_for_global_context?: number
/** Maximum number of tokens allocated for entity descriptions in local retrieval. */
max_token_for_local_context?: number
/** Maximum number of text chunks to retrieve and process. */
chunk_top_k?: number
/** Number of text chunks to keep after reranking. */
chunk_rerank_top_k?: number
/** Maximum number of tokens allocated for entity context in unified token control system. */
max_entity_tokens?: number
/** Maximum number of tokens allocated for relationship context in unified token control system. */
max_relation_tokens?: number
/** Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt). */
max_total_tokens?: number
/**
* Stores past conversation history to maintain context.
* Format: [{"role": "user/assistant", "content": "message"}].

View file

@ -132,30 +132,81 @@ export default function QuerySettings() {
</div>
</>
{/* Chunk Top K */}
<>
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<label htmlFor="chunk_top_k" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.chunkTopK')}
</label>
</TooltipTrigger>
<TooltipContent side="left">
<p>{t('retrievePanel.querySettings.chunkTopKTooltip')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
<div>
<NumberInput
id="chunk_top_k"
stepper={1}
value={querySettings.chunk_top_k}
onValueChange={(v) => handleChange('chunk_top_k', v)}
min={1}
placeholder={t('retrievePanel.querySettings.chunkTopKPlaceholder')}
/>
</div>
</>
{/* Chunk Rerank Top K */}
<>
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<label htmlFor="chunk_rerank_top_k" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.chunkRerankTopK')}
</label>
</TooltipTrigger>
<TooltipContent side="left">
<p>{t('retrievePanel.querySettings.chunkRerankTopKTooltip')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
<div>
<NumberInput
id="chunk_rerank_top_k"
stepper={1}
value={querySettings.chunk_rerank_top_k}
onValueChange={(v) => handleChange('chunk_rerank_top_k', v)}
min={1}
placeholder={t('retrievePanel.querySettings.chunkRerankTopKPlaceholder')}
/>
</div>
</>
{/* Max Tokens */}
<>
<>
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<label htmlFor="max_token_for_text_unit" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.maxTokensTextUnit')}
<label htmlFor="max_entity_tokens" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.maxEntityTokens')}
</label>
</TooltipTrigger>
<TooltipContent side="left">
<p>{t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}</p>
<p>{t('retrievePanel.querySettings.maxEntityTokensTooltip')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
<div>
{/* Removed sr-only label */}
<NumberInput
id="max_token_for_text_unit"
id="max_entity_tokens"
stepper={500}
value={querySettings.max_token_for_text_unit}
onValueChange={(v) => handleChange('max_token_for_text_unit', v)}
value={querySettings.max_entity_tokens}
onValueChange={(v) => handleChange('max_entity_tokens', v)}
min={1}
placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')}
placeholder={t('retrievePanel.querySettings.maxEntityTokens')}
/>
</div>
</>
@ -164,24 +215,23 @@ export default function QuerySettings() {
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<label htmlFor="max_token_for_global_context" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.maxTokensGlobalContext')}
<label htmlFor="max_relation_tokens" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.maxRelationTokens')}
</label>
</TooltipTrigger>
<TooltipContent side="left">
<p>{t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}</p>
<p>{t('retrievePanel.querySettings.maxRelationTokensTooltip')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
<div>
{/* Removed sr-only label */}
<NumberInput
id="max_token_for_global_context"
id="max_relation_tokens"
stepper={500}
value={querySettings.max_token_for_global_context}
onValueChange={(v) => handleChange('max_token_for_global_context', v)}
value={querySettings.max_relation_tokens}
onValueChange={(v) => handleChange('max_relation_tokens', v)}
min={1}
placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')}
placeholder={t('retrievePanel.querySettings.maxRelationTokens')}
/>
</div>
</>
@ -190,24 +240,23 @@ export default function QuerySettings() {
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<label htmlFor="max_token_for_local_context" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.maxTokensLocalContext')}
<label htmlFor="max_total_tokens" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.maxTotalTokens')}
</label>
</TooltipTrigger>
<TooltipContent side="left">
<p>{t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}</p>
<p>{t('retrievePanel.querySettings.maxTotalTokensTooltip')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
<div>
{/* Removed sr-only label */}
<NumberInput
id="max_token_for_local_context"
stepper={500}
value={querySettings.max_token_for_local_context}
onValueChange={(v) => handleChange('max_token_for_local_context', v)}
id="max_total_tokens"
stepper={1000}
value={querySettings.max_total_tokens}
onValueChange={(v) => handleChange('max_total_tokens', v)}
min={1}
placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')}
placeholder={t('retrievePanel.querySettings.maxTotalTokens')}
/>
</div>
</>

View file

@ -359,16 +359,22 @@
"singleParagraph": "فقرة واحدة",
"bulletPoints": "نقاط نقطية"
},
"topK": "أعلى K نتائج",
"topKTooltip": "عدد العناصر العلوية للاسترجاع. يمثل الكيانات في وضع 'محلي' والعلاقات في وضع 'عالمي'",
"topKPlaceholder": "عدد النتائج",
"maxTokensTextUnit": "أقصى عدد من الرموز لوحدة النص",
"maxTokensTextUnitTooltip": "الحد الأقصى لعدد الرموز المسموح به لكل جزء نصي مسترجع",
"maxTokensGlobalContext": "أقصى عدد من الرموز للسياق العالمي",
"maxTokensGlobalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف العلاقات في الاسترجاع العالمي",
"maxTokensLocalContext": "أقصى عدد من الرموز للسياق المحلي",
"maxTokensLocalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف الكيانات في الاسترجاع المحلي",
"historyTurns": "دورات التاريخ",
"topK": "أعلى K",
"topKTooltip": "عدد العناصر العلوية للاسترداد. يمثل الكيانات في الوضع 'المحلي' والعلاقات في الوضع 'العالمي'.",
"topKPlaceholder": "أدخل قيمة أعلى k",
"chunkTopK": "أعلى K للقطع",
"chunkTopKTooltip": "العدد الأقصى لقطع النص المراد استردادها ومعالجتها.",
"chunkTopKPlaceholder": "أدخل قيمة أعلى k للقطع",
"chunkRerankTopK": "أعلى K لإعادة الترتيب",
"chunkRerankTopKTooltip": "عدد قطع النص المراد الاحتفاظ بها بعد إعادة الترتيب.",
"chunkRerankTopKPlaceholder": "أدخل قيمة أعلى k لإعادة الترتيب",
"maxEntityTokens": "الحد الأقصى لرموز الكيان",
"maxEntityTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق الكيان في نظام التحكم الموحد في الرموز",
"maxRelationTokens": "الحد الأقصى لرموز العلاقة",
"maxRelationTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق العلاقة في نظام التحكم الموحد في الرموز",
"maxTotalTokens": "إجمالي الحد الأقصى للرموز",
"maxTotalTokensTooltip": "الحد الأقصى الإجمالي لميزانية الرموز لسياق الاستعلام بالكامل (الكيانات + العلاقات + الأجزاء + موجه النظام)",
"historyTurns": "أدوار التاريخ",
"historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد",
"historyTurnsPlaceholder": "عدد دورات التاريخ",
"onlyNeedContext": "تحتاج فقط إلى السياق",

View file

@ -359,15 +359,21 @@
"singleParagraph": "Single Paragraph",
"bulletPoints": "Bullet Points"
},
"topK": "Top K Results",
"topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode",
"topKPlaceholder": "Number of results",
"maxTokensTextUnit": "Max Tokens for Text Unit",
"maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk",
"maxTokensGlobalContext": "Max Tokens for Global Context",
"maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval",
"maxTokensLocalContext": "Max Tokens for Local Context",
"maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval",
"topK": "Top K",
"topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.",
"topKPlaceholder": "Enter top k value",
"chunkTopK": "Chunk Top K",
"chunkTopKTooltip": "Maximum number of text chunks to retrieve and process.",
"chunkTopKPlaceholder": "Enter chunk top k value",
"chunkRerankTopK": "Chunk Rerank Top K",
"chunkRerankTopKTooltip": "Number of text chunks to keep after reranking.",
"chunkRerankTopKPlaceholder": "Enter rerank top k value",
"maxEntityTokens": "Max Entity Tokens",
"maxEntityTokensTooltip": "Maximum number of tokens allocated for entity context in unified token control system",
"maxRelationTokens": "Max Relation Tokens",
"maxRelationTokensTooltip": "Maximum number of tokens allocated for relationship context in unified token control system",
"maxTotalTokens": "Max Total Tokens",
"maxTotalTokensTooltip": "Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)",
"historyTurns": "History Turns",
"historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
"historyTurnsPlaceholder": "Number of history turns",

View file

@ -359,15 +359,21 @@
"singleParagraph": "Paragraphe unique",
"bulletPoints": "Points à puces"
},
"topK": "Top K résultats",
"topKTooltip": "Nombre d'éléments supérieurs à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'",
"topKPlaceholder": "Nombre de résultats",
"maxTokensTextUnit": "Nombre maximum de jetons pour l'unité de texte",
"maxTokensTextUnitTooltip": "Nombre maximum de jetons autorisés pour chaque fragment de texte récupéré",
"maxTokensGlobalContext": "Nombre maximum de jetons pour le contexte global",
"maxTokensGlobalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des relations dans la récupération globale",
"maxTokensLocalContext": "Nombre maximum de jetons pour le contexte local",
"maxTokensLocalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des entités dans la récupération locale",
"topK": "Top K",
"topKTooltip": "Nombre d'éléments principaux à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'.",
"topKPlaceholder": "Entrez la valeur top k",
"chunkTopK": "Top K des Chunks",
"chunkTopKTooltip": "Nombre maximum de chunks de texte à récupérer et traiter.",
"chunkTopKPlaceholder": "Entrez la valeur top k des chunks",
"chunkRerankTopK": "Top K du Reclassement",
"chunkRerankTopKTooltip": "Nombre de chunks de texte à conserver après reclassement.",
"chunkRerankTopKPlaceholder": "Entrez la valeur top k du reclassement",
"maxEntityTokens": "Limite de jetons d'entité",
"maxEntityTokensTooltip": "Nombre maximum de jetons alloués au contexte d'entité dans le système de contrôle de jetons unifié",
"maxRelationTokens": "Limite de jetons de relation",
"maxRelationTokensTooltip": "Nombre maximum de jetons alloués au contexte de relation dans le système de contrôle de jetons unifié",
"maxTotalTokens": "Limite totale de jetons",
"maxTotalTokensTooltip": "Budget total maximum de jetons pour l'ensemble du contexte de requête (entités + relations + blocs + prompt système)",
"historyTurns": "Tours d'historique",
"historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse",
"historyTurnsPlaceholder": "Nombre de tours d'historique",

View file

@ -359,15 +359,21 @@
"singleParagraph": "单段落",
"bulletPoints": "要点"
},
"topK": "Top K结果",
"topKTooltip": "检索的顶部项目数。在'local'模式下表示实体,在'global'模式下表示关系",
"topKPlaceholder": "结果数量",
"maxTokensTextUnit": "文本单元最大令牌数",
"maxTokensTextUnitTooltip": "每个检索文本块允许的最大令牌数",
"maxTokensGlobalContext": "全局上下文最大令牌数",
"maxTokensGlobalContextTooltip": "全局检索中关系描述的最大令牌数",
"maxTokensLocalContext": "本地上下文最大令牌数",
"maxTokensLocalContextTooltip": "本地检索中实体描述的最大令牌数",
"topK": "Top K",
"topKTooltip": "检索的顶部条目数量。在'local'模式下表示实体,在'global'模式下表示关系。",
"topKPlaceholder": "输入top k值",
"chunkTopK": "文本块 Top K",
"chunkTopKTooltip": "检索和处理的最大文本块数量。",
"chunkTopKPlaceholder": "输入文本块top k值",
"chunkRerankTopK": "重排序 Top K",
"chunkRerankTopKTooltip": "重排序后保留的文本块数量。",
"chunkRerankTopKPlaceholder": "输入重排序top k值",
"maxEntityTokens": "实体令牌数上限",
"maxEntityTokensTooltip": "统一令牌控制系统中分配给实体上下文的最大令牌数",
"maxRelationTokens": "关系令牌数上限",
"maxRelationTokensTooltip": "统一令牌控制系统中分配给关系上下文的最大令牌数",
"maxTotalTokens": "总令牌数上限",
"maxTotalTokensTooltip": "整个查询上下文的最大总令牌预算(实体+关系+文档块+系统提示)",
"historyTurns": "历史轮次",
"historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量",
"historyTurnsPlaceholder": "历史轮次数",

View file

@ -300,7 +300,7 @@
"file_path": "來源",
"keywords": "Keys",
"weight": "權重"
}
}
},
"edge": {
"title": "關係",
@ -359,15 +359,15 @@
"singleParagraph": "單段落",
"bulletPoints": "重點"
},
"topK": "Top K結果",
"topKTooltip": "檢索的前幾項結果數。在'local'模式下表示實體,在'global'模式下表示關係",
"topKPlaceholder": "結果數量",
"maxTokensTextUnit": "文字單元最大權杖數",
"maxTokensTextUnitTooltip": "每個檢索文字區塊允許的最大權杖數",
"maxTokensGlobalContext": "全域上下文最大權杖數",
"maxTokensGlobalContextTooltip": "全域檢索中關係描述的最大權杖數",
"maxTokensLocalContext": "本地上下文最大權杖數",
"maxTokensLocalContextTooltip": "本地檢索中實體描述的最大權杖數",
"topK": "Top K",
"topKTooltip": "檢索的頂部條目數量。在'local'模式下表示實體,在'global'模式下表示關係",
"topKPlaceholder": "輸入top k值",
"chunkTopK": "文字區塊 Top K",
"chunkTopKTooltip": "檢索和處理的最大文字區塊數量。",
"chunkTopKPlaceholder": "輸入文字區塊top k值",
"chunkRerankTopK": "重新排序 Top K",
"chunkRerankTopKTooltip": "重新排序後保留的文字區塊數量。",
"chunkRerankTopKPlaceholder": "輸入重新排序top k值",
"historyTurns": "歷史輪次",
"historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量",
"historyTurnsPlaceholder": "歷史輪次數",
@ -379,7 +379,13 @@
"streamResponseTooltip": "如果為True啟用即時串流輸出回應",
"userPrompt": "用戶提示詞",
"userPromptTooltip": "向LLM提供額外的響應要求與查詢內容無關僅用於處理輸出。",
"userPromptPlaceholder": "輸入自定義提示詞(可選)"
"userPromptPlaceholder": "輸入自定義提示詞(可選)",
"maxEntityTokens": "實體令牌數上限",
"maxEntityTokensTooltip": "統一令牌控制系統中分配給實體上下文的最大令牌數",
"maxRelationTokens": "關係令牌數上限",
"maxRelationTokensTooltip": "統一令牌控制系統中分配給關係上下文的最大令牌數",
"maxTotalTokens": "總令牌數上限",
"maxTotalTokensTooltip": "整個查詢上下文的最大總令牌預算(實體+關係+文檔塊+系統提示)"
}
},
"apiSite": {

View file

@ -111,9 +111,11 @@ const useSettingsStoreBase = create<SettingsState>()(
mode: 'global',
response_type: 'Multiple Paragraphs',
top_k: 10,
max_token_for_text_unit: 6000,
max_token_for_global_context: 4000,
max_token_for_local_context: 4000,
chunk_top_k: 5,
chunk_rerank_top_k: 5,
max_entity_tokens: 10000,
max_relation_tokens: 10000,
max_total_tokens: 32000,
only_need_context: false,
only_need_prompt: false,
stream: true,
@ -192,7 +194,7 @@ const useSettingsStoreBase = create<SettingsState>()(
{
name: 'settings-storage',
storage: createJSONStorage(() => localStorage),
version: 14,
version: 15,
migrate: (state: any, version: number) => {
if (version < 2) {
state.showEdgeLabel = false
@ -215,9 +217,9 @@ const useSettingsStoreBase = create<SettingsState>()(
mode: 'global',
response_type: 'Multiple Paragraphs',
top_k: 10,
max_token_for_text_unit: 4000,
max_token_for_global_context: 4000,
max_token_for_local_context: 4000,
max_entity_tokens: 10000,
max_relation_tokens: 10000,
max_total_tokens: 32000,
only_need_context: false,
only_need_prompt: false,
stream: true,
@ -260,6 +262,26 @@ const useSettingsStoreBase = create<SettingsState>()(
// Add backendMaxGraphNodes field for older versions
state.backendMaxGraphNodes = null
}
if (version < 15) {
// 完整更新querySettings到统一token控制系统
state.querySettings = {
mode: 'global',
response_type: 'Multiple Paragraphs',
top_k: 10,
chunk_top_k: 5,
chunk_rerank_top_k: 5,
max_entity_tokens: 10000,
max_relation_tokens: 10000,
max_total_tokens: 32000,
only_need_context: false,
only_need_prompt: false,
stream: true,
history_turns: 3,
hl_keywords: [],
ll_keywords: [],
user_prompt: ''
}
}
return state
}
}