diff --git a/env.example b/env.example
index fea99953..4c8d355d 100644
--- a/env.example
+++ b/env.example
@@ -23,13 +23,13 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # WORKING_DIR=<absolute_path_for_working_dir>
 
 ### Tiktoken cache directory (Store cached files in this folder for offline deployment)
-# TIKTOKEN_CACHE_DIR=/app/data/tiktoken
+# TIKTOKEN_CACHE_DIR=./temp/tiktoken
 
 ### Ollama Emulating Model and Tag
 # OLLAMA_EMULATING_MODEL_NAME=lightrag
 OLLAMA_EMULATING_MODEL_TAG=latest
 
-### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
+### Max nodes return from graph retrieval in webui
 # MAX_GRAPH_NODES=1000
 
 ### Logging level
@@ -50,8 +50,6 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 # JWT_ALGORITHM=HS256
 
 ### API-Key to access LightRAG Server API
-### Use this key in HTTP requests with the 'X-API-Key' header
-### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
 # LIGHTRAG_API_KEY=your-secure-api-key-here
 # WHITELIST_PATHS=/health,/api/*
 
@@ -76,6 +74,11 @@ ENABLE_LLM_CACHE=true
 ### control the maximum tokens send to LLM (include entities, relations and chunks)
 # MAX_TOTAL_TOKENS=30000
 
+### maximum number of related chunks per source entity or relation
+###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
+###     Higher values increase re-ranking time
+# RELATED_CHUNK_NUMBER=5
+
 ### chunk selection strategies
 ###     VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
 ###     WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
@@ -121,9 +124,6 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Document processing output language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English
 
-### PDF decryption password for protected PDF files
-# PDF_DECRYPT_PASSWORD=your_pdf_password_here
-
 ### Entity types that the LLM will attempt to recognize
 # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
 
@@ -140,22 +140,6 @@ SUMMARY_LANGUAGE=English
 ### Maximum context size sent to LLM for description summary
 # SUMMARY_CONTEXT_SIZE=12000
 
-### control the maximum chunk_ids stored in vector and graph db
-# MAX_SOURCE_IDS_PER_ENTITY=300
-# MAX_SOURCE_IDS_PER_RELATION=300
-### control chunk_ids limitation method: FIFO, KEEP
-###    FIFO: First in first out
-###    KEEP: Keep oldest (less merge action and faster)
-# SOURCE_IDS_LIMIT_METHOD=FIFO
-
-# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
-# MAX_FILE_PATHS=100
-
-### maximum number of related chunks per source entity or relation
-###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
-###     Higher values increase re-ranking time
-# RELATED_CHUNK_NUMBER=5
-
 ###############################
 ### Concurrency Configuration
 ###############################
@@ -168,13 +152,10 @@ MAX_PARALLEL_INSERT=2
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=10
 
-###########################################################################
+###########################################################
 ### LLM Configuration
-### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
-### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
-### If LightRAG deployed in Docker:
-###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
-###########################################################################
+### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
+###########################################################
 ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
 # LLM_TIMEOUT=180
 
@@ -183,13 +164,9 @@ LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
 
-### Azure OpenAI example
-### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead
+### Optional for Azure
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
-# LLM_BINDING=azure_openai
-# LLM_BINDING_HOST=https://xxxx.openai.azure.com/
-# LLM_BINDING_API_KEY=your_api_key
-# LLM_MODEL=my-gpt-mini-deployment
+# AZURE_OPENAI_DEPLOYMENT=gpt-4o
 
 ### Openrouter example
 # LLM_MODEL=google/gemini-2.5-flash
@@ -197,21 +174,18 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai
 
-### Gemini example
-# LLM_BINDING=gemini
-# LLM_MODEL=gemini-flash-latest
-# LLM_BINDING_API_KEY=your_gemini_api_key
-# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
+### OpenAI Compatible API Specific Parameters
+### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
+# OPENAI_LLM_TEMPERATURE=0.9
+### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
+### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
+### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
+# OPENAI_LLM_MAX_TOKENS=9000
+### For OpenAI o1-mini or newer modles
+OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 
-### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
-### lightrag-server --llm-binding gemini --help
-### Gemini Specific Parameters
-# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
-# GEMINI_LLM_TEMPERATURE=0.7
-### Enable Thinking
-# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
-### Disable Thinking
-# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
+# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 
 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
 ### lightrag-server --llm-binding openai --help
@@ -222,16 +196,6 @@ LLM_BINDING_API_KEY=your_api_key
 ### Qwen3 Specific Parameters deploy by vLLM
 # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
 
-### OpenAI Compatible API Specific Parameters
-### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
-# OPENAI_LLM_TEMPERATURE=0.9
-### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
-### Typically, max_tokens does not include prompt content
-### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
-# OPENAI_LLM_MAX_TOKENS=9000
-### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
-OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
-
 ### use the following command to see all support options for Ollama LLM
 ### lightrag-server --llm-binding ollama --help
 ### Ollama Server Specific Parameters
@@ -245,56 +209,30 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Bedrock Specific Parameters
 # BEDROCK_LLM_TEMPERATURE=1.0
 
-#######################################################################################
+####################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
-### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
-### If LightRAG deployed in Docker:
-###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
-#######################################################################################
+####################################################################################
 # EMBEDDING_TIMEOUT=30
-
-### Control whether to send embedding_dim parameter to embedding API
-### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
-### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
-### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
-### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
-
-# Ollama embedding
-# EMBEDDING_BINDING=ollama
-# EMBEDDING_MODEL=bge-m3:latest
-# EMBEDDING_DIM=1024
-# EMBEDDING_BINDING_API_KEY=your_api_key
-### If LightRAG deployed in Docker uses host.docker.internal instead of localhost
-# EMBEDDING_BINDING_HOST=http://localhost:11434
-
-### OpenAI compatible embedding
-EMBEDDING_BINDING=openai
-EMBEDDING_MODEL=text-embedding-3-large
-EMBEDDING_DIM=3072
-EMBEDDING_SEND_DIM=false
-EMBEDDING_TOKEN_LIMIT=8192
-EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+EMBEDDING_BINDING=ollama
+EMBEDDING_MODEL=bge-m3:latest
+EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
+# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
+EMBEDDING_BINDING_HOST=http://localhost:11434
 
-### Optional for Azure embedding
-### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
-# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
-# EMBEDDING_BINDING=azure_openai
-# EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
-# EMBEDDING_API_KEY=your_api_key
-# EMBEDDING_MODEL==my-text-embedding-3-large-deployment
+### OpenAI compatible (VoyageAI embedding openai compatible)
+# EMBEDDING_BINDING=openai
+# EMBEDDING_MODEL=text-embedding-3-large
 # EMBEDDING_DIM=3072
-
-### Gemini embedding
-# EMBEDDING_BINDING=gemini
-# EMBEDDING_MODEL=gemini-embedding-001
-# EMBEDDING_DIM=1536
-# EMBEDDING_TOKEN_LIMIT=2048
-# EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com
+# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 # EMBEDDING_BINDING_API_KEY=your_api_key
-### Gemini embedding requires sending dimension to server
-# EMBEDDING_SEND_DIM=true
+
+### Optional for Azure
+# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
+# AZURE_EMBEDDING_API_VERSION=2023-05-15
+# AZURE_EMBEDDING_ENDPOINT=your_endpoint
+# AZURE_EMBEDDING_API_KEY=your_api_key
 
 ### Jina AI Embedding
 # EMBEDDING_BINDING=jina
@@ -356,18 +294,14 @@ POSTGRES_USER=your_username
 POSTGRES_PASSWORD='your_password'
 POSTGRES_DATABASE=your_database
 POSTGRES_MAX_CONNECTIONS=12
-### DB specific workspace should not be set, keep for compatible only
-### POSTGRES_WORKSPACE=forced_workspace_name
+# POSTGRES_WORKSPACE=forced_workspace_name
 
 ### PostgreSQL Vector Storage Configuration
-### Vector storage type: HNSW, IVFFlat, VCHORDRQ
+### Vector storage type: HNSW, IVFFlat
 POSTGRES_VECTOR_INDEX_TYPE=HNSW
 POSTGRES_HNSW_M=16
 POSTGRES_HNSW_EF=200
 POSTGRES_IVFFLAT_LISTS=100
-POSTGRES_VCHORDRQ_BUILD_OPTIONS=
-POSTGRES_VCHORDRQ_PROBES=
-POSTGRES_VCHORDRQ_EPSILON=1.9
 
 ### PostgreSQL Connection Retry Configuration (Network Robustness)
 ### Number of retry attempts (1-10, default: 3)
@@ -406,8 +340,7 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30
 NEO4J_MAX_CONNECTION_LIFETIME=300
 NEO4J_LIVENESS_CHECK_TIMEOUT=30
 NEO4J_KEEP_ALIVE=true
-### DB specific workspace should not be set, keep for compatible only
-### NEO4J_WORKSPACE=forced_workspace_name
+# NEO4J_WORKSPACE=forced_workspace_name
 
 ### MongoDB Configuration
 MONGO_URI=mongodb://root:root@localhost:27017/
@@ -421,14 +354,12 @@ MILVUS_DB_NAME=lightrag
 # MILVUS_USER=root
 # MILVUS_PASSWORD=your_password
 # MILVUS_TOKEN=your_token
-### DB specific workspace should not be set, keep for compatible only
-### MILVUS_WORKSPACE=forced_workspace_name
+# MILVUS_WORKSPACE=forced_workspace_name
 
 ### Qdrant
 QDRANT_URL=http://localhost:6333
 # QDRANT_API_KEY=your-api-key
-### DB specific workspace should not be set, keep for compatible only
-### QDRANT_WORKSPACE=forced_workspace_name
+# QDRANT_WORKSPACE=forced_workspace_name
 
 ### Redis
 REDIS_URI=redis://localhost:6379
@@ -436,45 +367,11 @@ REDIS_SOCKET_TIMEOUT=30
 REDIS_CONNECT_TIMEOUT=10
 REDIS_MAX_CONNECTIONS=100
 REDIS_RETRY_ATTEMPTS=3
-### DB specific workspace should not be set, keep for compatible only
-### REDIS_WORKSPACE=forced_workspace_name
+# REDIS_WORKSPACE=forced_workspace_name
 
 ### Memgraph Configuration
 MEMGRAPH_URI=bolt://localhost:7687
 MEMGRAPH_USERNAME=
 MEMGRAPH_PASSWORD=
 MEMGRAPH_DATABASE=memgraph
-### DB specific workspace should not be set, keep for compatible only
-### MEMGRAPH_WORKSPACE=forced_workspace_name
-
-############################
-### Evaluation Configuration
-############################
-### RAGAS evaluation models (used for RAG quality assessment)
-### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
-### Default uses OpenAI models for evaluation
-
-### LLM Configuration for Evaluation
-# EVAL_LLM_MODEL=gpt-4o-mini
-### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
-# EVAL_LLM_BINDING_API_KEY=your_api_key
-### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
-# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
-
-### Embedding Configuration for Evaluation
-# EVAL_EMBEDDING_MODEL=text-embedding-3-large
-### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
-# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
-### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
-# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
-
-### Performance Tuning
-### Number of concurrent test case evaluations
-### Lower values reduce API rate limit issues but increase evaluation time
-# EVAL_MAX_CONCURRENT=2
-### TOP_K query parameter of LightRAG (default: 10)
-### Number of entities or relations retrieved from KG
-# EVAL_QUERY_TOP_K=10
-### LLM request retry and timeout settings for evaluation
-# EVAL_LLM_MAX_RETRIES=5
-# EVAL_LLM_TIMEOUT=180
+# MEMGRAPH_WORKSPACE=forced_workspace_name