Update token limit
This commit is contained in:
parent
ba0cffd853
commit
ef2115d437
14 changed files with 459 additions and 172 deletions
14
README-zh.md
14
README-zh.md
|
|
@ -304,16 +304,14 @@ class QueryParam:
|
|||
If None, keeps all chunks returned from initial retrieval.
|
||||
"""
|
||||
|
||||
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
|
||||
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
||||
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
|
||||
"""Maximum number of tokens allocated for entity context in unified token control system."""
|
||||
|
||||
max_token_for_global_context: int = int(
|
||||
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
|
||||
)
|
||||
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
||||
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
|
||||
"""Maximum number of tokens allocated for relationship context in unified token control system."""
|
||||
|
||||
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
||||
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
||||
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
|
||||
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
||||
|
||||
hl_keywords: list[str] = field(default_factory=list)
|
||||
"""List of high-level keywords to prioritize in retrieval."""
|
||||
|
|
|
|||
14
README.md
14
README.md
|
|
@ -311,16 +311,14 @@ class QueryParam:
|
|||
If None, keeps all chunks returned from initial retrieval.
|
||||
"""
|
||||
|
||||
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
|
||||
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
||||
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
|
||||
"""Maximum number of tokens allocated for entity context in unified token control system."""
|
||||
|
||||
max_token_for_global_context: int = int(
|
||||
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
|
||||
)
|
||||
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
||||
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
|
||||
"""Maximum number of tokens allocated for relationship context in unified token control system."""
|
||||
|
||||
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
||||
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
||||
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
|
||||
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
||||
|
||||
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
||||
"""Stores past conversation history to maintain context.
|
||||
|
|
|
|||
|
|
@ -50,9 +50,12 @@ OLLAMA_EMULATING_MODEL_TAG=latest
|
|||
|
||||
### RAG Query Configuration
|
||||
# HISTORY_TURNS=3
|
||||
# MAX_TOKEN_TEXT_CHUNK=6000
|
||||
# MAX_TOKEN_RELATION_DESC=4000
|
||||
# MAX_TOKEN_ENTITY_DESC=4000
|
||||
|
||||
### These parameters provide more precise control over total token usage
|
||||
# MAX_ENTITY_TOKENS=10000
|
||||
# MAX_RELATION_TOKENS=10000
|
||||
# MAX_TOTAL_TOKENS=32000
|
||||
|
||||
# COSINE_THRESHOLD=0.2
|
||||
### Number of entities or relations to retrieve from KG
|
||||
# TOP_K=60
|
||||
|
|
|
|||
|
|
@ -61,22 +61,22 @@ class QueryRequest(BaseModel):
|
|||
description="Number of text chunks to keep after reranking.",
|
||||
)
|
||||
|
||||
max_token_for_text_unit: Optional[int] = Field(
|
||||
gt=1,
|
||||
max_entity_tokens: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Maximum number of tokens allowed for each retrieved text chunk.",
|
||||
description="Maximum number of tokens allocated for entity context in unified token control system.",
|
||||
ge=1,
|
||||
)
|
||||
|
||||
max_token_for_global_context: Optional[int] = Field(
|
||||
gt=1,
|
||||
max_relation_tokens: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Maximum number of tokens allocated for relationship descriptions in global retrieval.",
|
||||
description="Maximum number of tokens allocated for relationship context in unified token control system.",
|
||||
ge=1,
|
||||
)
|
||||
|
||||
max_token_for_local_context: Optional[int] = Field(
|
||||
gt=1,
|
||||
max_total_tokens: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Maximum number of tokens allocated for entity descriptions in local retrieval.",
|
||||
description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).",
|
||||
ge=1,
|
||||
)
|
||||
|
||||
conversation_history: Optional[List[Dict[str, Any]]] = Field(
|
||||
|
|
|
|||
|
|
@ -70,16 +70,14 @@ class QueryParam:
|
|||
If None, keeps all chunks returned from initial retrieval.
|
||||
"""
|
||||
|
||||
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "6000"))
|
||||
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
||||
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
|
||||
"""Maximum number of tokens allocated for entity context in unified token control system."""
|
||||
|
||||
max_token_for_global_context: int = int(
|
||||
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
|
||||
)
|
||||
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
||||
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
|
||||
"""Maximum number of tokens allocated for relationship context in unified token control system."""
|
||||
|
||||
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
||||
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
||||
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
|
||||
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
||||
|
||||
hl_keywords: list[str] = field(default_factory=list)
|
||||
"""List of high-level keywords to prioritize in retrieval."""
|
||||
|
|
|
|||
|
|
@ -1569,7 +1569,9 @@ async def kg_query(
|
|||
|
||||
tokenizer: Tokenizer = global_config["tokenizer"]
|
||||
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
||||
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
||||
logger.debug(
|
||||
f"[kg_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
|
||||
)
|
||||
|
||||
response = await use_model_func(
|
||||
query,
|
||||
|
|
@ -1692,7 +1694,9 @@ async def extract_keywords_only(
|
|||
|
||||
tokenizer: Tokenizer = global_config["tokenizer"]
|
||||
len_of_prompts = len(tokenizer.encode(kw_prompt))
|
||||
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
||||
logger.debug(
|
||||
f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})"
|
||||
)
|
||||
|
||||
# 5. Call the LLM for keyword extraction
|
||||
if param.model_func:
|
||||
|
|
@ -1864,7 +1868,7 @@ async def _build_query_context(
|
|||
|
||||
# Combine entities and relations contexts
|
||||
entities_context = process_combine_contexts(
|
||||
hl_entities_context, ll_entities_context
|
||||
ll_entities_context, hl_entities_context
|
||||
)
|
||||
relations_context = process_combine_contexts(
|
||||
hl_relations_context, ll_relations_context
|
||||
|
|
@ -1894,6 +1898,163 @@ async def _build_query_context(
|
|||
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
|
||||
)
|
||||
|
||||
# Unified token control system - Apply precise token limits to entities and relations
|
||||
tokenizer = text_chunks_db.global_config.get("tokenizer")
|
||||
if tokenizer:
|
||||
# Get new token limits from query_param (with fallback to global_config)
|
||||
max_entity_tokens = getattr(
|
||||
query_param,
|
||||
"max_entity_tokens",
|
||||
text_chunks_db.global_config.get("MAX_ENTITY_TOKENS", 8000),
|
||||
)
|
||||
max_relation_tokens = getattr(
|
||||
query_param,
|
||||
"max_relation_tokens",
|
||||
text_chunks_db.global_config.get("MAX_RELATION_TOKENS", 6000),
|
||||
)
|
||||
max_total_tokens = getattr(
|
||||
query_param,
|
||||
"max_total_tokens",
|
||||
text_chunks_db.global_config.get("MAX_TOTAL_TOKENS", 32000),
|
||||
)
|
||||
|
||||
# Truncate entities based on complete JSON serialization
|
||||
if entities_context:
|
||||
original_entity_count = len(entities_context)
|
||||
entities_context = truncate_list_by_token_size(
|
||||
entities_context,
|
||||
key=lambda x: json.dumps(x, ensure_ascii=False),
|
||||
max_token_size=max_entity_tokens,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
if len(entities_context) < original_entity_count:
|
||||
logger.debug(
|
||||
f"Truncated entities: {original_entity_count} -> {len(entities_context)} (entity max tokens: {max_entity_tokens})"
|
||||
)
|
||||
|
||||
# Truncate relations based on complete JSON serialization
|
||||
if relations_context:
|
||||
original_relation_count = len(relations_context)
|
||||
relations_context = truncate_list_by_token_size(
|
||||
relations_context,
|
||||
key=lambda x: json.dumps(x, ensure_ascii=False),
|
||||
max_token_size=max_relation_tokens,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
if len(relations_context) < original_relation_count:
|
||||
logger.debug(
|
||||
f"Truncated relations: {original_relation_count} -> {len(relations_context)} (relation max tokens: {max_relation_tokens})"
|
||||
)
|
||||
|
||||
# Calculate dynamic token limit for text chunks
|
||||
entities_str = json.dumps(entities_context, ensure_ascii=False)
|
||||
relations_str = json.dumps(relations_context, ensure_ascii=False)
|
||||
|
||||
# Calculate base context tokens (entities + relations + template)
|
||||
kg_context_template = """-----Entities(KG)-----
|
||||
|
||||
```json
|
||||
{entities_str}
|
||||
```
|
||||
|
||||
-----Relationships(KG)-----
|
||||
|
||||
```json
|
||||
{relations_str}
|
||||
```
|
||||
|
||||
-----Document Chunks(DC)-----
|
||||
|
||||
```json
|
||||
[]
|
||||
```
|
||||
|
||||
"""
|
||||
kg_context = kg_context_template.format(
|
||||
entities_str=entities_str, relations_str=relations_str
|
||||
)
|
||||
kg_context_tokens = len(tokenizer.encode(kg_context))
|
||||
|
||||
# Calculate actual system prompt overhead dynamically
|
||||
# 1. Calculate conversation history tokens
|
||||
history_context = ""
|
||||
if query_param.conversation_history:
|
||||
history_context = get_conversation_turns(
|
||||
query_param.conversation_history, query_param.history_turns
|
||||
)
|
||||
history_tokens = (
|
||||
len(tokenizer.encode(history_context)) if history_context else 0
|
||||
)
|
||||
|
||||
# 2. Calculate system prompt template tokens (excluding context_data)
|
||||
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
|
||||
response_type = (
|
||||
query_param.response_type
|
||||
if query_param.response_type
|
||||
else "Multiple Paragraphs"
|
||||
)
|
||||
|
||||
# Get the system prompt template from PROMPTS
|
||||
sys_prompt_template = text_chunks_db.global_config.get(
|
||||
"system_prompt_template", PROMPTS["rag_response"]
|
||||
)
|
||||
|
||||
# Create a sample system prompt with placeholders filled (excluding context_data)
|
||||
sample_sys_prompt = sys_prompt_template.format(
|
||||
history=history_context,
|
||||
context_data="", # Empty for overhead calculation
|
||||
response_type=response_type,
|
||||
user_prompt=user_prompt,
|
||||
)
|
||||
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
|
||||
|
||||
# Total system prompt overhead = template + query tokens
|
||||
query_tokens = len(tokenizer.encode(query))
|
||||
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
|
||||
|
||||
buffer_tokens = 100 # Safety buffer as requested
|
||||
|
||||
# Calculate available tokens for text chunks
|
||||
used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
|
||||
available_chunk_tokens = max_total_tokens - used_tokens
|
||||
|
||||
logger.debug(
|
||||
f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
||||
)
|
||||
|
||||
# Re-process chunks with dynamic token limit
|
||||
if text_units_context:
|
||||
# Create a temporary query_param copy with adjusted chunk token limit
|
||||
temp_chunks = [
|
||||
{"content": chunk["content"], "file_path": chunk["file_path"]}
|
||||
for chunk in text_units_context
|
||||
]
|
||||
|
||||
# Apply token truncation to chunks using the dynamic limit
|
||||
truncated_chunks = await process_chunks_unified(
|
||||
query=query,
|
||||
chunks=temp_chunks,
|
||||
query_param=query_param,
|
||||
global_config=text_chunks_db.global_config,
|
||||
source_type="mixed",
|
||||
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
|
||||
)
|
||||
|
||||
# Rebuild text_units_context with truncated chunks
|
||||
text_units_context = []
|
||||
for i, chunk in enumerate(truncated_chunks):
|
||||
text_units_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"content": chunk["content"],
|
||||
"file_path": chunk.get("file_path", "unknown_source"),
|
||||
}
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Re-truncated chunks for dynamic token limit: {len(temp_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
|
||||
)
|
||||
|
||||
# not necessary to use LLM to generate a response
|
||||
if not entities_context and not relations_context:
|
||||
return None
|
||||
|
|
@ -1982,18 +2143,6 @@ async def _get_node_data(
|
|||
knowledge_graph_inst,
|
||||
)
|
||||
|
||||
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
|
||||
len_node_datas = len(node_datas)
|
||||
node_datas = truncate_list_by_token_size(
|
||||
node_datas,
|
||||
key=lambda x: x["description"] if x["description"] is not None else "",
|
||||
max_token_size=query_param.max_token_for_local_context,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
logger.debug(
|
||||
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
|
||||
)
|
||||
|
|
@ -2199,20 +2348,9 @@ async def _find_most_related_edges_from_entities(
|
|||
}
|
||||
all_edges_data.append(combined)
|
||||
|
||||
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
|
||||
all_edges_data = sorted(
|
||||
all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
||||
)
|
||||
all_edges_data = truncate_list_by_token_size(
|
||||
all_edges_data,
|
||||
key=lambda x: x["description"] if x["description"] is not None else "",
|
||||
max_token_size=query_param.max_token_for_global_context,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
|
||||
)
|
||||
|
||||
return all_edges_data
|
||||
|
||||
|
|
@ -2269,16 +2407,9 @@ async def _get_edge_data(
|
|||
}
|
||||
edge_datas.append(combined)
|
||||
|
||||
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
|
||||
edge_datas = sorted(
|
||||
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
||||
)
|
||||
edge_datas = truncate_list_by_token_size(
|
||||
edge_datas,
|
||||
key=lambda x: x["description"] if x["description"] is not None else "",
|
||||
max_token_size=query_param.max_token_for_global_context,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
use_entities, use_text_units = await asyncio.gather(
|
||||
_find_most_related_entities_from_relationships(
|
||||
edge_datas,
|
||||
|
|
@ -2388,18 +2519,6 @@ async def _find_most_related_entities_from_relationships(
|
|||
combined = {**node, "entity_name": entity_name, "rank": degree}
|
||||
node_datas.append(combined)
|
||||
|
||||
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
|
||||
len_node_datas = len(node_datas)
|
||||
node_datas = truncate_list_by_token_size(
|
||||
node_datas,
|
||||
key=lambda x: x["description"] if x["description"] is not None else "",
|
||||
max_token_size=query_param.max_token_for_local_context,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
logger.debug(
|
||||
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
||||
)
|
||||
|
||||
return node_datas
|
||||
|
||||
|
||||
|
|
@ -2491,13 +2610,64 @@ async def naive_query(
|
|||
if chunks is None or len(chunks) == 0:
|
||||
return PROMPTS["fail_response"]
|
||||
|
||||
# Process chunks using unified processing
|
||||
# Calculate dynamic token limit for chunks
|
||||
# Get token limits from query_param (with fallback to global_config)
|
||||
max_total_tokens = getattr(
|
||||
query_param, "max_total_tokens", global_config.get("MAX_TOTAL_TOKENS", 32000)
|
||||
)
|
||||
|
||||
# Calculate conversation history tokens
|
||||
history_context = ""
|
||||
if query_param.conversation_history:
|
||||
history_context = get_conversation_turns(
|
||||
query_param.conversation_history, query_param.history_turns
|
||||
)
|
||||
history_tokens = len(tokenizer.encode(history_context)) if history_context else 0
|
||||
|
||||
# Calculate system prompt template tokens (excluding content_data)
|
||||
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
|
||||
response_type = (
|
||||
query_param.response_type
|
||||
if query_param.response_type
|
||||
else "Multiple Paragraphs"
|
||||
)
|
||||
|
||||
# Use the provided system prompt or default
|
||||
sys_prompt_template = (
|
||||
system_prompt if system_prompt else PROMPTS["naive_rag_response"]
|
||||
)
|
||||
|
||||
# Create a sample system prompt with empty content_data to calculate overhead
|
||||
sample_sys_prompt = sys_prompt_template.format(
|
||||
content_data="", # Empty for overhead calculation
|
||||
response_type=response_type,
|
||||
history=history_context,
|
||||
user_prompt=user_prompt,
|
||||
)
|
||||
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
|
||||
|
||||
# Total system prompt overhead = template + query tokens
|
||||
query_tokens = len(tokenizer.encode(query))
|
||||
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
|
||||
|
||||
buffer_tokens = 100 # Safety buffer
|
||||
|
||||
# Calculate available tokens for chunks
|
||||
used_tokens = sys_prompt_overhead + buffer_tokens
|
||||
available_chunk_tokens = max_total_tokens - used_tokens
|
||||
|
||||
logger.debug(
|
||||
f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
||||
)
|
||||
|
||||
# Process chunks using unified processing with dynamic token limit
|
||||
processed_chunks = await process_chunks_unified(
|
||||
query=query,
|
||||
chunks=chunks,
|
||||
query_param=query_param,
|
||||
global_config=global_config,
|
||||
source_type="vector",
|
||||
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
|
||||
)
|
||||
|
||||
logger.info(f"Final context: {len(processed_chunks)} chunks")
|
||||
|
|
@ -2548,7 +2718,9 @@ async def naive_query(
|
|||
return sys_prompt
|
||||
|
||||
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
||||
logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}")
|
||||
logger.debug(
|
||||
f"[naive_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
|
||||
)
|
||||
|
||||
response = await use_model_func(
|
||||
query,
|
||||
|
|
@ -2672,7 +2844,9 @@ async def kg_query_with_keywords(
|
|||
|
||||
tokenizer: Tokenizer = global_config["tokenizer"]
|
||||
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
||||
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
|
||||
logger.debug(
|
||||
f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
|
||||
)
|
||||
|
||||
# 6. Generate response
|
||||
response = await use_model_func(
|
||||
|
|
@ -2849,6 +3023,7 @@ async def process_chunks_unified(
|
|||
query_param: QueryParam,
|
||||
global_config: dict,
|
||||
source_type: str = "mixed",
|
||||
chunk_token_limit: int = None, # Add parameter for dynamic token limit
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.
|
||||
|
|
@ -2859,6 +3034,7 @@ async def process_chunks_unified(
|
|||
query_param: Query parameters containing configuration
|
||||
global_config: Global configuration dictionary
|
||||
source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
|
||||
chunk_token_limit: Dynamic token limit for chunks (if None, uses default)
|
||||
|
||||
Returns:
|
||||
Processed and filtered list of text chunks
|
||||
|
|
@ -2901,16 +3077,25 @@ async def process_chunks_unified(
|
|||
# 4. Token-based final truncation
|
||||
tokenizer = global_config.get("tokenizer")
|
||||
if tokenizer and unique_chunks:
|
||||
# Set default chunk_token_limit if not provided
|
||||
if chunk_token_limit is None:
|
||||
# Get default from query_param or global_config
|
||||
chunk_token_limit = getattr(
|
||||
query_param,
|
||||
"max_total_tokens",
|
||||
global_config.get("MAX_TOTAL_TOKENS", 32000),
|
||||
)
|
||||
|
||||
original_count = len(unique_chunks)
|
||||
unique_chunks = truncate_list_by_token_size(
|
||||
unique_chunks,
|
||||
key=lambda x: x.get("content", ""),
|
||||
max_token_size=query_param.max_token_for_text_unit,
|
||||
max_token_size=chunk_token_limit,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
logger.debug(
|
||||
f"Token truncation: {len(unique_chunks)} chunks from {original_count} "
|
||||
f"(max tokens: {query_param.max_token_for_text_unit}, source: {source_type})"
|
||||
f"(chunk available tokens: {chunk_token_limit}, source: {source_type})"
|
||||
)
|
||||
|
||||
return unique_chunks
|
||||
|
|
|
|||
|
|
@ -90,12 +90,16 @@ export type QueryRequest = {
|
|||
stream?: boolean
|
||||
/** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */
|
||||
top_k?: number
|
||||
/** Maximum number of tokens allowed for each retrieved text chunk. */
|
||||
max_token_for_text_unit?: number
|
||||
/** Maximum number of tokens allocated for relationship descriptions in global retrieval. */
|
||||
max_token_for_global_context?: number
|
||||
/** Maximum number of tokens allocated for entity descriptions in local retrieval. */
|
||||
max_token_for_local_context?: number
|
||||
/** Maximum number of text chunks to retrieve and process. */
|
||||
chunk_top_k?: number
|
||||
/** Number of text chunks to keep after reranking. */
|
||||
chunk_rerank_top_k?: number
|
||||
/** Maximum number of tokens allocated for entity context in unified token control system. */
|
||||
max_entity_tokens?: number
|
||||
/** Maximum number of tokens allocated for relationship context in unified token control system. */
|
||||
max_relation_tokens?: number
|
||||
/** Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt). */
|
||||
max_total_tokens?: number
|
||||
/**
|
||||
* Stores past conversation history to maintain context.
|
||||
* Format: [{"role": "user/assistant", "content": "message"}].
|
||||
|
|
|
|||
|
|
@ -132,30 +132,81 @@ export default function QuerySettings() {
|
|||
</div>
|
||||
</>
|
||||
|
||||
{/* Chunk Top K */}
|
||||
<>
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<label htmlFor="chunk_top_k" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.chunkTopK')}
|
||||
</label>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="left">
|
||||
<p>{t('retrievePanel.querySettings.chunkTopKTooltip')}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
<div>
|
||||
<NumberInput
|
||||
id="chunk_top_k"
|
||||
stepper={1}
|
||||
value={querySettings.chunk_top_k}
|
||||
onValueChange={(v) => handleChange('chunk_top_k', v)}
|
||||
min={1}
|
||||
placeholder={t('retrievePanel.querySettings.chunkTopKPlaceholder')}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
||||
{/* Chunk Rerank Top K */}
|
||||
<>
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<label htmlFor="chunk_rerank_top_k" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.chunkRerankTopK')}
|
||||
</label>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="left">
|
||||
<p>{t('retrievePanel.querySettings.chunkRerankTopKTooltip')}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
<div>
|
||||
<NumberInput
|
||||
id="chunk_rerank_top_k"
|
||||
stepper={1}
|
||||
value={querySettings.chunk_rerank_top_k}
|
||||
onValueChange={(v) => handleChange('chunk_rerank_top_k', v)}
|
||||
min={1}
|
||||
placeholder={t('retrievePanel.querySettings.chunkRerankTopKPlaceholder')}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
||||
{/* Max Tokens */}
|
||||
<>
|
||||
<>
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<label htmlFor="max_token_for_text_unit" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.maxTokensTextUnit')}
|
||||
<label htmlFor="max_entity_tokens" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.maxEntityTokens')}
|
||||
</label>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="left">
|
||||
<p>{t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}</p>
|
||||
<p>{t('retrievePanel.querySettings.maxEntityTokensTooltip')}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
<div>
|
||||
{/* Removed sr-only label */}
|
||||
<NumberInput
|
||||
id="max_token_for_text_unit"
|
||||
id="max_entity_tokens"
|
||||
stepper={500}
|
||||
value={querySettings.max_token_for_text_unit}
|
||||
onValueChange={(v) => handleChange('max_token_for_text_unit', v)}
|
||||
value={querySettings.max_entity_tokens}
|
||||
onValueChange={(v) => handleChange('max_entity_tokens', v)}
|
||||
min={1}
|
||||
placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')}
|
||||
placeholder={t('retrievePanel.querySettings.maxEntityTokens')}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
|
@ -164,24 +215,23 @@ export default function QuerySettings() {
|
|||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<label htmlFor="max_token_for_global_context" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.maxTokensGlobalContext')}
|
||||
<label htmlFor="max_relation_tokens" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.maxRelationTokens')}
|
||||
</label>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="left">
|
||||
<p>{t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}</p>
|
||||
<p>{t('retrievePanel.querySettings.maxRelationTokensTooltip')}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
<div>
|
||||
{/* Removed sr-only label */}
|
||||
<NumberInput
|
||||
id="max_token_for_global_context"
|
||||
id="max_relation_tokens"
|
||||
stepper={500}
|
||||
value={querySettings.max_token_for_global_context}
|
||||
onValueChange={(v) => handleChange('max_token_for_global_context', v)}
|
||||
value={querySettings.max_relation_tokens}
|
||||
onValueChange={(v) => handleChange('max_relation_tokens', v)}
|
||||
min={1}
|
||||
placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')}
|
||||
placeholder={t('retrievePanel.querySettings.maxRelationTokens')}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
|
@ -190,24 +240,23 @@ export default function QuerySettings() {
|
|||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<label htmlFor="max_token_for_local_context" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.maxTokensLocalContext')}
|
||||
<label htmlFor="max_total_tokens" className="ml-1 cursor-help">
|
||||
{t('retrievePanel.querySettings.maxTotalTokens')}
|
||||
</label>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="left">
|
||||
<p>{t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}</p>
|
||||
<p>{t('retrievePanel.querySettings.maxTotalTokensTooltip')}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
<div>
|
||||
{/* Removed sr-only label */}
|
||||
<NumberInput
|
||||
id="max_token_for_local_context"
|
||||
stepper={500}
|
||||
value={querySettings.max_token_for_local_context}
|
||||
onValueChange={(v) => handleChange('max_token_for_local_context', v)}
|
||||
id="max_total_tokens"
|
||||
stepper={1000}
|
||||
value={querySettings.max_total_tokens}
|
||||
onValueChange={(v) => handleChange('max_total_tokens', v)}
|
||||
min={1}
|
||||
placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')}
|
||||
placeholder={t('retrievePanel.querySettings.maxTotalTokens')}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
|
|
|||
|
|
@ -359,16 +359,22 @@
|
|||
"singleParagraph": "فقرة واحدة",
|
||||
"bulletPoints": "نقاط نقطية"
|
||||
},
|
||||
"topK": "أعلى K نتائج",
|
||||
"topKTooltip": "عدد العناصر العلوية للاسترجاع. يمثل الكيانات في وضع 'محلي' والعلاقات في وضع 'عالمي'",
|
||||
"topKPlaceholder": "عدد النتائج",
|
||||
"maxTokensTextUnit": "أقصى عدد من الرموز لوحدة النص",
|
||||
"maxTokensTextUnitTooltip": "الحد الأقصى لعدد الرموز المسموح به لكل جزء نصي مسترجع",
|
||||
"maxTokensGlobalContext": "أقصى عدد من الرموز للسياق العالمي",
|
||||
"maxTokensGlobalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف العلاقات في الاسترجاع العالمي",
|
||||
"maxTokensLocalContext": "أقصى عدد من الرموز للسياق المحلي",
|
||||
"maxTokensLocalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف الكيانات في الاسترجاع المحلي",
|
||||
"historyTurns": "دورات التاريخ",
|
||||
"topK": "أعلى K",
|
||||
"topKTooltip": "عدد العناصر العلوية للاسترداد. يمثل الكيانات في الوضع 'المحلي' والعلاقات في الوضع 'العالمي'.",
|
||||
"topKPlaceholder": "أدخل قيمة أعلى k",
|
||||
"chunkTopK": "أعلى K للقطع",
|
||||
"chunkTopKTooltip": "العدد الأقصى لقطع النص المراد استردادها ومعالجتها.",
|
||||
"chunkTopKPlaceholder": "أدخل قيمة أعلى k للقطع",
|
||||
"chunkRerankTopK": "أعلى K لإعادة الترتيب",
|
||||
"chunkRerankTopKTooltip": "عدد قطع النص المراد الاحتفاظ بها بعد إعادة الترتيب.",
|
||||
"chunkRerankTopKPlaceholder": "أدخل قيمة أعلى k لإعادة الترتيب",
|
||||
"maxEntityTokens": "الحد الأقصى لرموز الكيان",
|
||||
"maxEntityTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق الكيان في نظام التحكم الموحد في الرموز",
|
||||
"maxRelationTokens": "الحد الأقصى لرموز العلاقة",
|
||||
"maxRelationTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق العلاقة في نظام التحكم الموحد في الرموز",
|
||||
"maxTotalTokens": "إجمالي الحد الأقصى للرموز",
|
||||
"maxTotalTokensTooltip": "الحد الأقصى الإجمالي لميزانية الرموز لسياق الاستعلام بالكامل (الكيانات + العلاقات + الأجزاء + موجه النظام)",
|
||||
"historyTurns": "أدوار التاريخ",
|
||||
"historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد",
|
||||
"historyTurnsPlaceholder": "عدد دورات التاريخ",
|
||||
"onlyNeedContext": "تحتاج فقط إلى السياق",
|
||||
|
|
|
|||
|
|
@ -359,15 +359,21 @@
|
|||
"singleParagraph": "Single Paragraph",
|
||||
"bulletPoints": "Bullet Points"
|
||||
},
|
||||
"topK": "Top K Results",
|
||||
"topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode",
|
||||
"topKPlaceholder": "Number of results",
|
||||
"maxTokensTextUnit": "Max Tokens for Text Unit",
|
||||
"maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk",
|
||||
"maxTokensGlobalContext": "Max Tokens for Global Context",
|
||||
"maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval",
|
||||
"maxTokensLocalContext": "Max Tokens for Local Context",
|
||||
"maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval",
|
||||
"topK": "Top K",
|
||||
"topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.",
|
||||
"topKPlaceholder": "Enter top k value",
|
||||
"chunkTopK": "Chunk Top K",
|
||||
"chunkTopKTooltip": "Maximum number of text chunks to retrieve and process.",
|
||||
"chunkTopKPlaceholder": "Enter chunk top k value",
|
||||
"chunkRerankTopK": "Chunk Rerank Top K",
|
||||
"chunkRerankTopKTooltip": "Number of text chunks to keep after reranking.",
|
||||
"chunkRerankTopKPlaceholder": "Enter rerank top k value",
|
||||
"maxEntityTokens": "Max Entity Tokens",
|
||||
"maxEntityTokensTooltip": "Maximum number of tokens allocated for entity context in unified token control system",
|
||||
"maxRelationTokens": "Max Relation Tokens",
|
||||
"maxRelationTokensTooltip": "Maximum number of tokens allocated for relationship context in unified token control system",
|
||||
"maxTotalTokens": "Max Total Tokens",
|
||||
"maxTotalTokensTooltip": "Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)",
|
||||
"historyTurns": "History Turns",
|
||||
"historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
|
||||
"historyTurnsPlaceholder": "Number of history turns",
|
||||
|
|
|
|||
|
|
@ -359,15 +359,21 @@
|
|||
"singleParagraph": "Paragraphe unique",
|
||||
"bulletPoints": "Points à puces"
|
||||
},
|
||||
"topK": "Top K résultats",
|
||||
"topKTooltip": "Nombre d'éléments supérieurs à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'",
|
||||
"topKPlaceholder": "Nombre de résultats",
|
||||
"maxTokensTextUnit": "Nombre maximum de jetons pour l'unité de texte",
|
||||
"maxTokensTextUnitTooltip": "Nombre maximum de jetons autorisés pour chaque fragment de texte récupéré",
|
||||
"maxTokensGlobalContext": "Nombre maximum de jetons pour le contexte global",
|
||||
"maxTokensGlobalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des relations dans la récupération globale",
|
||||
"maxTokensLocalContext": "Nombre maximum de jetons pour le contexte local",
|
||||
"maxTokensLocalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des entités dans la récupération locale",
|
||||
"topK": "Top K",
|
||||
"topKTooltip": "Nombre d'éléments principaux à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'.",
|
||||
"topKPlaceholder": "Entrez la valeur top k",
|
||||
"chunkTopK": "Top K des Chunks",
|
||||
"chunkTopKTooltip": "Nombre maximum de chunks de texte à récupérer et traiter.",
|
||||
"chunkTopKPlaceholder": "Entrez la valeur top k des chunks",
|
||||
"chunkRerankTopK": "Top K du Reclassement",
|
||||
"chunkRerankTopKTooltip": "Nombre de chunks de texte à conserver après reclassement.",
|
||||
"chunkRerankTopKPlaceholder": "Entrez la valeur top k du reclassement",
|
||||
"maxEntityTokens": "Limite de jetons d'entité",
|
||||
"maxEntityTokensTooltip": "Nombre maximum de jetons alloués au contexte d'entité dans le système de contrôle de jetons unifié",
|
||||
"maxRelationTokens": "Limite de jetons de relation",
|
||||
"maxRelationTokensTooltip": "Nombre maximum de jetons alloués au contexte de relation dans le système de contrôle de jetons unifié",
|
||||
"maxTotalTokens": "Limite totale de jetons",
|
||||
"maxTotalTokensTooltip": "Budget total maximum de jetons pour l'ensemble du contexte de requête (entités + relations + blocs + prompt système)",
|
||||
"historyTurns": "Tours d'historique",
|
||||
"historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse",
|
||||
"historyTurnsPlaceholder": "Nombre de tours d'historique",
|
||||
|
|
|
|||
|
|
@ -359,15 +359,21 @@
|
|||
"singleParagraph": "单段落",
|
||||
"bulletPoints": "要点"
|
||||
},
|
||||
"topK": "Top K结果",
|
||||
"topKTooltip": "检索的顶部项目数。在'local'模式下表示实体,在'global'模式下表示关系",
|
||||
"topKPlaceholder": "结果数量",
|
||||
"maxTokensTextUnit": "文本单元最大令牌数",
|
||||
"maxTokensTextUnitTooltip": "每个检索文本块允许的最大令牌数",
|
||||
"maxTokensGlobalContext": "全局上下文最大令牌数",
|
||||
"maxTokensGlobalContextTooltip": "全局检索中关系描述的最大令牌数",
|
||||
"maxTokensLocalContext": "本地上下文最大令牌数",
|
||||
"maxTokensLocalContextTooltip": "本地检索中实体描述的最大令牌数",
|
||||
"topK": "Top K",
|
||||
"topKTooltip": "检索的顶部条目数量。在'local'模式下表示实体,在'global'模式下表示关系。",
|
||||
"topKPlaceholder": "输入top k值",
|
||||
"chunkTopK": "文本块 Top K",
|
||||
"chunkTopKTooltip": "检索和处理的最大文本块数量。",
|
||||
"chunkTopKPlaceholder": "输入文本块top k值",
|
||||
"chunkRerankTopK": "重排序 Top K",
|
||||
"chunkRerankTopKTooltip": "重排序后保留的文本块数量。",
|
||||
"chunkRerankTopKPlaceholder": "输入重排序top k值",
|
||||
"maxEntityTokens": "实体令牌数上限",
|
||||
"maxEntityTokensTooltip": "统一令牌控制系统中分配给实体上下文的最大令牌数",
|
||||
"maxRelationTokens": "关系令牌数上限",
|
||||
"maxRelationTokensTooltip": "统一令牌控制系统中分配给关系上下文的最大令牌数",
|
||||
"maxTotalTokens": "总令牌数上限",
|
||||
"maxTotalTokensTooltip": "整个查询上下文的最大总令牌预算(实体+关系+文档块+系统提示)",
|
||||
"historyTurns": "历史轮次",
|
||||
"historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量",
|
||||
"historyTurnsPlaceholder": "历史轮次数",
|
||||
|
|
|
|||
|
|
@ -300,7 +300,7 @@
|
|||
"file_path": "來源",
|
||||
"keywords": "Keys",
|
||||
"weight": "權重"
|
||||
}
|
||||
}
|
||||
},
|
||||
"edge": {
|
||||
"title": "關係",
|
||||
|
|
@ -359,15 +359,15 @@
|
|||
"singleParagraph": "單段落",
|
||||
"bulletPoints": "重點"
|
||||
},
|
||||
"topK": "Top K結果",
|
||||
"topKTooltip": "檢索的前幾項結果數。在'local'模式下表示實體,在'global'模式下表示關係",
|
||||
"topKPlaceholder": "結果數量",
|
||||
"maxTokensTextUnit": "文字單元最大權杖數",
|
||||
"maxTokensTextUnitTooltip": "每個檢索文字區塊允許的最大權杖數",
|
||||
"maxTokensGlobalContext": "全域上下文最大權杖數",
|
||||
"maxTokensGlobalContextTooltip": "全域檢索中關係描述的最大權杖數",
|
||||
"maxTokensLocalContext": "本地上下文最大權杖數",
|
||||
"maxTokensLocalContextTooltip": "本地檢索中實體描述的最大權杖數",
|
||||
"topK": "Top K",
|
||||
"topKTooltip": "檢索的頂部條目數量。在'local'模式下表示實體,在'global'模式下表示關係。",
|
||||
"topKPlaceholder": "輸入top k值",
|
||||
"chunkTopK": "文字區塊 Top K",
|
||||
"chunkTopKTooltip": "檢索和處理的最大文字區塊數量。",
|
||||
"chunkTopKPlaceholder": "輸入文字區塊top k值",
|
||||
"chunkRerankTopK": "重新排序 Top K",
|
||||
"chunkRerankTopKTooltip": "重新排序後保留的文字區塊數量。",
|
||||
"chunkRerankTopKPlaceholder": "輸入重新排序top k值",
|
||||
"historyTurns": "歷史輪次",
|
||||
"historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量",
|
||||
"historyTurnsPlaceholder": "歷史輪次數",
|
||||
|
|
@ -379,7 +379,13 @@
|
|||
"streamResponseTooltip": "如果為True,啟用即時串流輸出回應",
|
||||
"userPrompt": "用戶提示詞",
|
||||
"userPromptTooltip": "向LLM提供額外的響應要求(與查詢內容無關,僅用於處理輸出)。",
|
||||
"userPromptPlaceholder": "輸入自定義提示詞(可選)"
|
||||
"userPromptPlaceholder": "輸入自定義提示詞(可選)",
|
||||
"maxEntityTokens": "實體令牌數上限",
|
||||
"maxEntityTokensTooltip": "統一令牌控制系統中分配給實體上下文的最大令牌數",
|
||||
"maxRelationTokens": "關係令牌數上限",
|
||||
"maxRelationTokensTooltip": "統一令牌控制系統中分配給關係上下文的最大令牌數",
|
||||
"maxTotalTokens": "總令牌數上限",
|
||||
"maxTotalTokensTooltip": "整個查詢上下文的最大總令牌預算(實體+關係+文檔塊+系統提示)"
|
||||
}
|
||||
},
|
||||
"apiSite": {
|
||||
|
|
|
|||
|
|
@ -111,9 +111,11 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|||
mode: 'global',
|
||||
response_type: 'Multiple Paragraphs',
|
||||
top_k: 10,
|
||||
max_token_for_text_unit: 6000,
|
||||
max_token_for_global_context: 4000,
|
||||
max_token_for_local_context: 4000,
|
||||
chunk_top_k: 5,
|
||||
chunk_rerank_top_k: 5,
|
||||
max_entity_tokens: 10000,
|
||||
max_relation_tokens: 10000,
|
||||
max_total_tokens: 32000,
|
||||
only_need_context: false,
|
||||
only_need_prompt: false,
|
||||
stream: true,
|
||||
|
|
@ -192,7 +194,7 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|||
{
|
||||
name: 'settings-storage',
|
||||
storage: createJSONStorage(() => localStorage),
|
||||
version: 14,
|
||||
version: 15,
|
||||
migrate: (state: any, version: number) => {
|
||||
if (version < 2) {
|
||||
state.showEdgeLabel = false
|
||||
|
|
@ -215,9 +217,9 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|||
mode: 'global',
|
||||
response_type: 'Multiple Paragraphs',
|
||||
top_k: 10,
|
||||
max_token_for_text_unit: 4000,
|
||||
max_token_for_global_context: 4000,
|
||||
max_token_for_local_context: 4000,
|
||||
max_entity_tokens: 10000,
|
||||
max_relation_tokens: 10000,
|
||||
max_total_tokens: 32000,
|
||||
only_need_context: false,
|
||||
only_need_prompt: false,
|
||||
stream: true,
|
||||
|
|
@ -260,6 +262,26 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|||
// Add backendMaxGraphNodes field for older versions
|
||||
state.backendMaxGraphNodes = null
|
||||
}
|
||||
if (version < 15) {
|
||||
// 完整更新querySettings到统一token控制系统
|
||||
state.querySettings = {
|
||||
mode: 'global',
|
||||
response_type: 'Multiple Paragraphs',
|
||||
top_k: 10,
|
||||
chunk_top_k: 5,
|
||||
chunk_rerank_top_k: 5,
|
||||
max_entity_tokens: 10000,
|
||||
max_relation_tokens: 10000,
|
||||
max_total_tokens: 32000,
|
||||
only_need_context: false,
|
||||
only_need_prompt: false,
|
||||
stream: true,
|
||||
history_turns: 3,
|
||||
hl_keywords: [],
|
||||
ll_keywords: [],
|
||||
user_prompt: ''
|
||||
}
|
||||
}
|
||||
return state
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue