From 9923821d758af62fd47180bee3d091ef24c07e4e Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Tue, 29 Jul 2025 10:49:35 +0800
Subject: [PATCH] refactor: Remove deprecated `max_token_size` from embedding
 configuration

This parameter is no longer used. Its removal simplifies the API and clarifies that token length management is handled by upstream text chunking logic rather than the embedding wrapper.
---
 README-zh.md                          | 7 -------
 README.md                             | 7 -------
 docs/DockerDeployment.md              | 1 -
 env.example                           | 4 ++--
 lightrag/api/config.py                | 1 -
 lightrag/api/lightrag_server.py       | 1 -
 lightrag/api/utils_api.py             | 2 --
 lightrag/llm/Readme.md                | 3 ---
 lightrag/llm/azure_openai.py          | 2 +-
 lightrag/llm/bedrock.py               | 2 +-
 lightrag/llm/jina.py                  | 2 +-
 lightrag/llm/llama_index_impl.py      | 2 +-
 lightrag/llm/nvidia_openai.py         | 2 +-
 lightrag/llm/openai.py                | 2 +-
 lightrag/llm/siliconcloud.py          | 2 +-
 lightrag/llm/zhipu.py                 | 2 +-
 lightrag/utils.py                     | 3 +--
 reproduce/Step_1_openai_compatible.py | 4 +---
 reproduce/Step_3_openai_compatible.py | 4 +---
 19 files changed, 13 insertions(+), 40 deletions(-)

diff --git a/README-zh.md b/README-zh.md
index 707c7ab5..42ab71f5 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -396,7 +396,6 @@ async def initialize_rag():
         llm_model_func=llm_model_func,
         embedding_func=EmbeddingFunc(
             embedding_dim=4096,
-            max_token_size=8192,
             func=embedding_func
         )
     )
@@ -425,7 +424,6 @@ rag = LightRAG(
     # 使用Hugging Face嵌入函数
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=5000,
         func=lambda texts: hf_embed(
             texts,
             tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@@ -452,7 +450,6 @@ rag = LightRAG(
     # 使用Ollama嵌入函数
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -504,7 +501,6 @@ rag = LightRAG(
     # 使用Ollama嵌入函数
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -547,7 +543,6 @@ async def initialize_rag():
         llm_model_func=llama_index_complete_if_cache,  # LlamaIndex兼容的完成函数
         embedding_func=EmbeddingFunc(    # LlamaIndex兼容的嵌入函数
             embedding_dim=1536,
-            max_token_size=8192,
             func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
         ),
     )
@@ -809,7 +804,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=8192,
         func=embedding_func,
     ),
     vector_storage="FaissVectorDBStorage",
@@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                 ),
                 embedding_func=EmbeddingFunc(
                     embedding_dim=3072,
-                    max_token_size=8192,
                     func=lambda texts: openai_embed(
                         texts,
                         model="text-embedding-3-large",
diff --git a/README.md b/README.md
index 1af28d3b..74ac132c 100644
--- a/README.md
+++ b/README.md
@@ -397,7 +397,6 @@ async def initialize_rag():
         llm_model_func=llm_model_func,
         embedding_func=EmbeddingFunc(
             embedding_dim=4096,
-            max_token_size=8192,
             func=embedding_func
         )
     )
@@ -426,7 +425,6 @@ rag = LightRAG(
     # Use Hugging Face embedding function
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=5000,
         func=lambda texts: hf_embed(
             texts,
             tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@@ -455,7 +453,6 @@ rag = LightRAG(
     # Use Ollama embedding function
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -507,7 +504,6 @@ rag = LightRAG(
     # Use Ollama embedding function
     embedding_func=EmbeddingFunc(
         embedding_dim=768,
-        max_token_size=8192,
         func=lambda texts: ollama_embed(
             texts,
             embed_model="nomic-embed-text"
@@ -550,7 +546,6 @@ async def initialize_rag():
         llm_model_func=llama_index_complete_if_cache,  # LlamaIndex-compatible completion function
         embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
             embedding_dim=1536,
-            max_token_size=8192,
             func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
         ),
     )
@@ -872,7 +867,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=384,
-        max_token_size=8192,
         func=embedding_func,
     ),
     vector_storage="FaissVectorDBStorage",
@@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                 ),
                 embedding_func=EmbeddingFunc(
                     embedding_dim=3072,
-                    max_token_size=8192,
                     func=lambda texts: openai_embed(
                         texts,
                         model="text-embedding-3-large",
diff --git a/docs/DockerDeployment.md b/docs/DockerDeployment.md
index e7955cf8..72d7da8e 100644
--- a/docs/DockerDeployment.md
+++ b/docs/DockerDeployment.md
@@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
 - `MAX_ASYNC`: Maximum async operations
 - `MAX_TOKENS`: Maximum token size
 - `EMBEDDING_DIM`: Embedding dimensions
-- `MAX_EMBED_TOKENS`: Maximum embedding token size
 
 #### Security
 - `LIGHTRAG_API_KEY`: API key for authentication
diff --git a/env.example b/env.example
index 79cd1a13..850538fa 100644
--- a/env.example
+++ b/env.example
@@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
 ### Embedding Configuration (Should not be changed after the first file processed)
 ####################################################################################
 ### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
+
+### see also env.ollama-binding-options.example for fine tuning ollama
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434
-### Maximum tokens sent to Embedding for each chunk (no longer in use?)
-# MAX_EMBED_TOKENS=8192
 
 ### OpenAI compatible
 # EMBEDDING_BINDING=openai
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index cf8c0492..befedc42 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
     args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
     args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
     args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
-    args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
 
     # Inject chunk configuration
     args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 8845e06c..5ac100a6 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -273,7 +273,6 @@ def create_app(args):
 
     embedding_func = EmbeddingFunc(
         embedding_dim=args.embedding_dim,
-        max_token_size=args.max_embed_tokens,
         func=lambda texts: lollms_embed(
             texts,
             embed_model=args.embedding_model,
diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py
index ad5d13c9..90a1eb96 100644
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.summary_language}")
     ASCIIColors.white("    ├─ Max Parallel Insert: ", end="")
     ASCIIColors.yellow(f"{args.max_parallel_insert}")
-    ASCIIColors.white("    ├─ Max Embed Tokens: ", end="")
-    ASCIIColors.yellow(f"{args.max_embed_tokens}")
     ASCIIColors.white("    ├─ Chunk Size: ", end="")
     ASCIIColors.yellow(f"{args.chunk_size}")
     ASCIIColors.white("    ├─ Chunk Overlap Size: ", end="")
diff --git a/lightrag/llm/Readme.md b/lightrag/llm/Readme.md
index 969d70e3..c907fd4d 100644
--- a/lightrag/llm/Readme.md
+++ b/lightrag/llm/Readme.md
@@ -58,7 +58,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=1536,
-        max_token_size=8192,
         func=lambda texts: llama_index_embed(
             texts,
             embed_model=OpenAIEmbedding(
@@ -114,7 +113,6 @@ rag = LightRAG(
     llm_model_func=llm_model_func,
     embedding_func=EmbeddingFunc(
         embedding_dim=1536,
-        max_token_size=8192,
         func=lambda texts: llama_index_embed(
             texts,
             embed_model=LiteLLMEmbedding(
@@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
 # Model Configuration
 LLM_MODEL=gpt-4
 EMBEDDING_MODEL=text-embedding-3-large
-EMBEDDING_MAX_TOKEN_SIZE=8192
 ```
 
 ### Key Differences
diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py
index e2fcedbb..ecec0fcc 100644
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@@ -121,7 +121,7 @@ async def azure_openai_complete(
     return result
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
diff --git a/lightrag/llm/bedrock.py b/lightrag/llm/bedrock.py
index d243983c..e1edc7ff 100644
--- a/lightrag/llm/bedrock.py
+++ b/lightrag/llm/bedrock.py
@@ -110,7 +110,7 @@ async def bedrock_complete(
     return result
 
 
-# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+# @wrap_embedding_func_with_attrs(embedding_dim=1024)
 # @retry(
 #     stop=stop_after_attempt(3),
 #     wait=wait_exponential(multiplier=1, min=4, max=10),
diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py
index 6a1e95d2..5a1b59fb 100644
--- a/lightrag/llm/jina.py
+++ b/lightrag/llm/jina.py
@@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
             return data_list
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/llama_index_impl.py b/lightrag/llm/llama_index_impl.py
index b8dc39f3..a88d830f 100644
--- a/lightrag/llm/llama_index_impl.py
+++ b/lightrag/llm/llama_index_impl.py
@@ -170,7 +170,7 @@ async def llama_index_complete(
     return result
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/nvidia_openai.py b/lightrag/llm/nvidia_openai.py
index e711c4ac..1cbab380 100644
--- a/lightrag/llm/nvidia_openai.py
+++ b/lightrag/llm/nvidia_openai.py
@@ -33,7 +33,7 @@ from lightrag.utils import (
 import numpy as np
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index ff10d2f5..cedd804d 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -432,7 +432,7 @@ async def nvidia_openai_complete(
     return result
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/llm/siliconcloud.py b/lightrag/llm/siliconcloud.py
index 41adb96e..fe8da0dd 100644
--- a/lightrag/llm/siliconcloud.py
+++ b/lightrag/llm/siliconcloud.py
@@ -40,7 +40,7 @@ async def siliconcloud_embedding(
     texts: list[str],
     model: str = "netease-youdao/bce-embedding-base_v1",
     base_url: str = "https://api.siliconflow.cn/v1/embeddings",
-    max_token_size: int = 512,
+    max_token_size: int = 8192,
     api_key: str = None,
 ) -> np.ndarray:
     if api_key and not api_key.startswith("Bearer "):
diff --git a/lightrag/llm/zhipu.py b/lightrag/llm/zhipu.py
index dede42dc..c9d1253e 100644
--- a/lightrag/llm/zhipu.py
+++ b/lightrag/llm/zhipu.py
@@ -167,7 +167,7 @@ async def zhipu_complete(
         )
 
 
-@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1024)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 2fd67f0e..5f4c5a12 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -237,9 +237,8 @@ class UnlimitedSemaphore:
 @dataclass
 class EmbeddingFunc:
     embedding_dim: int
-    max_token_size: int
     func: callable
-    # concurrent_limit: int = 16
+    max_token_size: int | None = None  # deprecated keep it for compatible only
 
     async def __call__(self, *args, **kwargs) -> np.ndarray:
         return await self.func(*args, **kwargs)
diff --git a/reproduce/Step_1_openai_compatible.py b/reproduce/Step_1_openai_compatible.py
index 3b9944eb..8093a9ee 100644
--- a/reproduce/Step_1_openai_compatible.py
+++ b/reproduce/Step_1_openai_compatible.py
@@ -67,9 +67,7 @@ async def initialize_rag():
     rag = LightRAG(
         working_dir=WORKING_DIR,
         llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
     )
 
     await rag.initialize_storages()
diff --git a/reproduce/Step_3_openai_compatible.py b/reproduce/Step_3_openai_compatible.py
index 56159ee1..d163cce9 100644
--- a/reproduce/Step_3_openai_compatible.py
+++ b/reproduce/Step_3_openai_compatible.py
@@ -92,9 +92,7 @@ if __name__ == "__main__":
     rag = LightRAG(
         working_dir=WORKING_DIR,
         llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
     )
     query_param = QueryParam(mode=mode)