refactor: Remove deprecated max_token_size from embedding configuration

This parameter is no longer used. Its removal simplifies the API and clarifies that token length management is handled by upstream text chunking logic rather than the embedding wrapper.
2025-07-29 10:49:35 +08:00 · 2025-07-29 10:49:35 +08:00 · 9923821d75
commit 9923821d75
parent d26d413d97
19 changed files with 13 additions and 40 deletions
--- a/README-zh.md
+++ b/README-zh.md
@ -396,7 +396,6 @@ async def initialize_rag():
        llm_model_func=llm_model_func,
        embedding_func=EmbeddingFunc(
            embedding_dim=4096,
-            max_token_size=8192,
            func=embedding_func
        )
    )
@ -425,7 +424,6 @@ rag = LightRAG(
    # 使用Hugging Face嵌入函数
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=5000,
        func=lambda texts: hf_embed(
            texts,
            tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -452,7 +450,6 @@ rag = LightRAG(
    # 使用Ollama嵌入函数
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -504,7 +501,6 @@ rag = LightRAG(
    # 使用Ollama嵌入函数
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -547,7 +543,6 @@ async def initialize_rag():
        llm_model_func=llama_index_complete_if_cache,  # LlamaIndex兼容的完成函数
        embedding_func=EmbeddingFunc(    # LlamaIndex兼容的嵌入函数
            embedding_dim=1536,
-            max_token_size=8192,
            func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
        ),
    )
@ -809,7 +804,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=8192,
        func=embedding_func,
    ),
    vector_storage="FaissVectorDBStorage",
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                ),
                embedding_func=EmbeddingFunc(
                    embedding_dim=3072,
-                    max_token_size=8192,
                    func=lambda texts: openai_embed(
                        texts,
                        model="text-embedding-3-large",
--- a/README.md
+++ b/README.md
@ -397,7 +397,6 @@ async def initialize_rag():
        llm_model_func=llm_model_func,
        embedding_func=EmbeddingFunc(
            embedding_dim=4096,
-            max_token_size=8192,
            func=embedding_func
        )
    )
@ -426,7 +425,6 @@ rag = LightRAG(
    # Use Hugging Face embedding function
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=5000,
        func=lambda texts: hf_embed(
            texts,
            tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -455,7 +453,6 @@ rag = LightRAG(
    # Use Ollama embedding function
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -507,7 +504,6 @@ rag = LightRAG(
    # Use Ollama embedding function
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -550,7 +546,6 @@ async def initialize_rag():
        llm_model_func=llama_index_complete_if_cache,  # LlamaIndex-compatible completion function
        embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
            embedding_dim=1536,
-            max_token_size=8192,
            func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
        ),
    )
@ -872,7 +867,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=8192,
        func=embedding_func,
    ),
    vector_storage="FaissVectorDBStorage",
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                ),
                embedding_func=EmbeddingFunc(
                    embedding_dim=3072,
-                    max_token_size=8192,
                    func=lambda texts: openai_embed(
                        texts,
                        model="text-embedding-3-large",
--- a/docs/DockerDeployment.md
+++ b/docs/DockerDeployment.md
@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
 - `MAX_ASYNC`: Maximum async operations
 - `MAX_TOKENS`: Maximum token size
 - `EMBEDDING_DIM`: Embedding dimensions
- `MAX_EMBED_TOKENS`: Maximum embedding token size

 #### Security
 - `LIGHTRAG_API_KEY`: API key for authentication
--- a/env.example
+++ b/env.example
@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
 ### Embedding Configuration (Should not be changed after the first file processed)
 ####################################################################################
 ### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
+
+### see also env.ollama-binding-options.example for fine tuning ollama
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434
-### Maximum tokens sent to Embedding for each chunk (no longer in use?)
-# MAX_EMBED_TOKENS=8192

 ### OpenAI compatible
 # EMBEDDING_BINDING=openai
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
    args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
    args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
    args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
-    args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)

    # Inject chunk configuration
    args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -273,7 +273,6 @@ def create_app(args):

    embedding_func = EmbeddingFunc(
        embedding_dim=args.embedding_dim,
-        max_token_size=args.max_embed_tokens,
        func=lambda texts: lollms_embed(
            texts,
            embed_model=args.embedding_model,
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{args.summary_language}")
    ASCIIColors.white("    ├─ Max Parallel Insert: ", end="")
    ASCIIColors.yellow(f"{args.max_parallel_insert}")
-    ASCIIColors.white("    ├─ Max Embed Tokens: ", end="")
-    ASCIIColors.yellow(f"{args.max_embed_tokens}")
    ASCIIColors.white("    ├─ Chunk Size: ", end="")
    ASCIIColors.yellow(f"{args.chunk_size}")
    ASCIIColors.white("    ├─ Chunk Overlap Size: ", end="")
--- a/lightrag/llm/Readme.md
+++ b/lightrag/llm/Readme.md
@ -58,7 +58,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=1536,
-        max_token_size=8192,
        func=lambda texts: llama_index_embed(
            texts,
            embed_model=OpenAIEmbedding(
@ -114,7 +113,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=1536,
-        max_token_size=8192,
        func=lambda texts: llama_index_embed(
            texts,
            embed_model=LiteLLMEmbedding(
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
 # Model Configuration
 LLM_MODEL=gpt-4
 EMBEDDING_MODEL=text-embedding-3-large
-EMBEDDING_MAX_TOKEN_SIZE=8192
 ```

 ### Key Differences
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@ -121,7 +121,7 @@ async def azure_openai_complete(
    return result


-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
--- a/lightrag/llm/bedrock.py
+++ b/lightrag/llm/bedrock.py
@ -110,7 +110,7 @@ async def bedrock_complete(
    return result


-# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+# @wrap_embedding_func_with_attrs(embedding_dim=1024)
 # @retry(
 #     stop=stop_after_attempt(3),
 #     wait=wait_exponential(multiplier=1, min=4, max=10),
--- a/lightrag/llm/jina.py
+++ b/lightrag/llm/jina.py
@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
            return data_list


-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/llama_index_impl.py
+++ b/lightrag/llm/llama_index_impl.py
@ -170,7 +170,7 @@ async def llama_index_complete(
    return result


-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/nvidia_openai.py
+++ b/lightrag/llm/nvidia_openai.py
@ -33,7 +33,7 @@ from lightrag.utils import (
 import numpy as np


-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@ -432,7 +432,7 @@ async def nvidia_openai_complete(
    return result


-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/siliconcloud.py
+++ b/lightrag/llm/siliconcloud.py
@ -40,7 +40,7 @@ async def siliconcloud_embedding(
    texts: list[str],
    model: str = "netease-youdao/bce-embedding-base_v1",
    base_url: str = "https://api.siliconflow.cn/v1/embeddings",
-    max_token_size: int = 512,
+    max_token_size: int = 8192,
    api_key: str = None,
 ) -> np.ndarray:
    if api_key and not api_key.startswith("Bearer "):
--- a/lightrag/llm/zhipu.py
+++ b/lightrag/llm/zhipu.py
@ -167,7 +167,7 @@ async def zhipu_complete(
        )


-@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1024)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -237,9 +237,8 @@ class UnlimitedSemaphore:
@dataclass
 class EmbeddingFunc:
    embedding_dim: int
-    max_token_size: int
    func: callable
-    # concurrent_limit: int = 16
+    max_token_size: int | None = None  # deprecated keep it for compatible only

    async def __call__(self, *args, **kwargs) -> np.ndarray:
        return await self.func(*args, **kwargs)
--- a/reproduce/Step_1_openai_compatible.py
+++ b/reproduce/Step_1_openai_compatible.py
@ -67,9 +67,7 @@ async def initialize_rag():
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
    )

    await rag.initialize_storages()
--- a/reproduce/Step_3_openai_compatible.py
+++ b/reproduce/Step_3_openai_compatible.py
@ -92,9 +92,7 @@ if __name__ == "__main__":
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
    )
    query_param = QueryParam(mode=mode)