diff --git a/README-zh.md b/README-zh.md
index 707c7ab5..42ab71f5 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -396,7 +396,6 @@ async def initialize_rag():
         llm_model_func=llm_model_func,
         embedding_func=EmbeddingFunc(
             embedding_dim=4096,
-            max_token_size=8192,
             func=embedding_func
         )
     )
@@ -425,7 +424,6 @@ rag = LightRAG(
     # 使用Hugging Face嵌入函数
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=5000,
         func=lambda texts: hf_embed(
             texts,
             tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@@ -452,7 +450,6 @@ rag = LightRAG(
     # 使用Ollama嵌入函数
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -504,7 +501,6 @@ rag = LightRAG(
     # 使用Ollama嵌入函数
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -547,7 +543,6 @@ async def initialize_rag():
         llm_model_func=llama_index_complete_if_cache,  # LlamaIndex兼容的完成函数
         embedding_func=EmbeddingFunc(    # LlamaIndex兼容的嵌入函数
             embedding_dim=1536,
-            max_token_size=8192,
             func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
         ),
     )
@@ -809,7 +804,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=8192,
         func=embedding_func,
     ),
     vector_storage="FaissVectorDBStorage",
@@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                 ),
                 embedding_func=EmbeddingFunc(
                     embedding_dim=3072,
-                    max_token_size=8192,
                     func=lambda texts: openai_embed(
                         texts,
                         model="text-embedding-3-large",
diff --git a/README.md b/README.md
index 1af28d3b..74ac132c 100644
--- a/README.md
+++ b/README.md
@@ -397,7 +397,6 @@ async def initialize_rag():
         llm_model_func=llm_model_func,
         embedding_func=EmbeddingFunc(
             embedding_dim=4096,
-            max_token_size=8192,
             func=embedding_func
         )
     )
@@ -426,7 +425,6 @@ rag = LightRAG(
     # Use Hugging Face embedding function
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=5000,
         func=lambda texts: hf_embed(
             texts,
             tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@@ -455,7 +453,6 @@ rag = LightRAG(
     # Use Ollama embedding function
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -507,7 +504,6 @@ rag = LightRAG(
     # Use Ollama embedding function
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -550,7 +546,6 @@ async def initialize_rag():
         llm_model_func=llama_index_complete_if_cache,  # LlamaIndex-compatible completion function
         embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
             embedding_dim=1536,
-            max_token_size=8192,
             func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
         ),
     )
@@ -872,7 +867,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=8192,
         func=embedding_func,
     ),
     vector_storage="FaissVectorDBStorage",
@@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                 ),
                 embedding_func=EmbeddingFunc(
                     embedding_dim=3072,
-                    max_token_size=8192,
                     func=lambda texts: openai_embed(
                         texts,
                         model="text-embedding-3-large",
diff --git a/docs/DockerDeployment.md b/docs/DockerDeployment.md
index e7955cf8..72d7da8e 100644
--- a/docs/DockerDeployment.md
+++ b/docs/DockerDeployment.md
@@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
 - `MAX_ASYNC`: Maximum async operations
 - `MAX_TOKENS`: Maximum token size
 - `EMBEDDING_DIM`: Embedding dimensions
-- `MAX_EMBED_TOKENS`: Maximum embedding token size
 
 #### Security
 - `LIGHTRAG_API_KEY`: API key for authentication
diff --git a/env.example b/env.example
index 79cd1a13..850538fa 100644
--- a/env.example
+++ b/env.example
@@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
 ### Embedding Configuration (Should not be changed after the first file processed)
 ####################################################################################
 ### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
+
+### see also env.ollama-binding-options.example for fine tuning ollama
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434
-### Maximum tokens sent to Embedding for each chunk (no longer in use?)
-# MAX_EMBED_TOKENS=8192
 
 ### OpenAI compatible
 # EMBEDDING_BINDING=openai
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index cf8c0492..befedc42 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
     args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
     args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
     args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
-    args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
 
     # Inject chunk configuration
     args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 8845e06c..5ac100a6 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -273,7 +273,6 @@ def create_app(args):
 
     embedding_func = EmbeddingFunc(
         embedding_dim=args.embedding_dim,
-        max_token_size=args.max_embed_tokens,
         func=lambda texts: lollms_embed(
             texts,
             embed_model=args.embedding_model,
diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py
index ad5d13c9..90a1eb96 100644
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.summary_language}")
     ASCIIColors.white("    ├─ Max Parallel Insert: ", end="")
     ASCIIColors.yellow(f"{args.max_parallel_insert}")
-    ASCIIColors.white("    ├─ Max Embed Tokens: ", end="")
-    ASCIIColors.yellow(f"{args.max_embed_tokens}")
     ASCIIColors.white("    ├─ Chunk Size: ", end="")
     ASCIIColors.yellow(f"{args.chunk_size}")
     ASCIIColors.white("    ├─ Chunk Overlap Size: ", end="")
diff --git a/lightrag/llm/Readme.md b/lightrag/llm/Readme.md
index 969d70e3..c907fd4d 100644
--- a/lightrag/llm/Readme.md
+++ b/lightrag/llm/Readme.md
@@ -58,7 +58,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=1536,
-        max_token_size=8192,
         func=lambda texts: llama_index_embed(
             texts,
             embed_model=OpenAIEmbedding(
@@ -114,7 +113,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=1536,
-        max_token_size=8192,
         func=lambda texts: llama_index_embed(
             texts,
             embed_model=LiteLLMEmbedding(
@@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
 # Model Configuration
 LLM_MODEL=gpt-4
 EMBEDDING_MODEL=text-embedding-3-large
-EMBEDDING_MAX_TOKEN_SIZE=8192
 ```
 
 ### Key Differences
diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py
index e2fcedbb..ecec0fcc 100644
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@@ -121,7 +121,7 @@ async def azure_openai_complete(
     return result
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
diff --git a/lightrag/llm/bedrock.py b/lightrag/llm/bedrock.py
index d243983c..e1edc7ff 100644
--- a/lightrag/llm/bedrock.py
+++ b/lightrag/llm/bedrock.py
@@ -110,7 +110,7 @@ async def bedrock_complete(
     return result
 
 
-# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+# @wrap_embedding_func_with_attrs(embedding_dim=1024)
 # @retry(
 #     stop=stop_after_attempt(3),
 #     wait=wait_exponential(multiplier=1, min=4, max=10),
diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py
index 6a1e95d2..5a1b59fb 100644
--- a/lightrag/llm/jina.py
+++ b/lightrag/llm/jina.py
@@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
             return data_list
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/llama_index_impl.py b/lightrag/llm/llama_index_impl.py
index b8dc39f3..a88d830f 100644
--- a/lightrag/llm/llama_index_impl.py
+++ b/lightrag/llm/llama_index_impl.py
@@ -170,7 +170,7 @@ async def llama_index_complete(
     return result
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/nvidia_openai.py b/lightrag/llm/nvidia_openai.py
index e711c4ac..1cbab380 100644
--- a/lightrag/llm/nvidia_openai.py
+++ b/lightrag/llm/nvidia_openai.py
@@ -33,7 +33,7 @@ from lightrag.utils import (
 import numpy as np
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index ff10d2f5..cedd804d 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -432,7 +432,7 @@ async def nvidia_openai_complete(
     return result
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/siliconcloud.py b/lightrag/llm/siliconcloud.py
index 41adb96e..fe8da0dd 100644
--- a/lightrag/llm/siliconcloud.py
+++ b/lightrag/llm/siliconcloud.py
@@ -40,7 +40,7 @@ async def siliconcloud_embedding(
     texts: list[str],
     model: str = "netease-youdao/bce-embedding-base_v1",
     base_url: str = "https://api.siliconflow.cn/v1/embeddings",
-    max_token_size: int = 512,
+    max_token_size: int = 8192,
     api_key: str = None,
 ) -> np.ndarray:
     if api_key and not api_key.startswith("Bearer "):
diff --git a/lightrag/llm/zhipu.py b/lightrag/llm/zhipu.py
index dede42dc..c9d1253e 100644
--- a/lightrag/llm/zhipu.py
+++ b/lightrag/llm/zhipu.py
@@ -167,7 +167,7 @@ async def zhipu_complete(
         )
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1024)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 2fd67f0e..5f4c5a12 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -237,9 +237,8 @@ class UnlimitedSemaphore:
 @dataclass
 class EmbeddingFunc:
     embedding_dim: int
-    max_token_size: int
     func: callable
-    # concurrent_limit: int = 16
+    max_token_size: int | None = None  # deprecated keep it for compatible only
 
     async def __call__(self, *args, **kwargs) -> np.ndarray:
         return await self.func(*args, **kwargs)
diff --git a/reproduce/Step_1_openai_compatible.py b/reproduce/Step_1_openai_compatible.py
index 3b9944eb..8093a9ee 100644
--- a/reproduce/Step_1_openai_compatible.py
+++ b/reproduce/Step_1_openai_compatible.py
@@ -67,9 +67,7 @@ async def initialize_rag():
     rag = LightRAG(
         working_dir=WORKING_DIR,
         llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
     )
 
     await rag.initialize_storages()
diff --git a/reproduce/Step_3_openai_compatible.py b/reproduce/Step_3_openai_compatible.py
index 56159ee1..d163cce9 100644
--- a/reproduce/Step_3_openai_compatible.py
+++ b/reproduce/Step_3_openai_compatible.py
@@ -92,9 +92,7 @@ if __name__ == "__main__":
     rag = LightRAG(
         working_dir=WORKING_DIR,
         llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
     )
     query_param = QueryParam(mode=mode)