Merge pull request #1875 from danielaskdd/remove-embedding-max-token-size

refactor: Remove deprecated `max_token_size` from embedding configura…
2025-07-29 11:23:54 +08:00 · 2025-07-29 11:23:54 +08:00 · 7a5df185a5
commit 7a5df185a5
parent d26d413d97 9923821d75
19 changed files with 13 additions and 40 deletions
--- a/README-zh.md
+++ b/README-zh.md
@ -396,7 +396,6 @@ async def initialize_rag():
        llm_model_func=llm_model_func,
        embedding_func=EmbeddingFunc(
            embedding_dim=4096,
-            max_token_size=8192,
            func=embedding_func
        )
    )
@ -425,7 +424,6 @@ rag = LightRAG(
    # 使用Hugging Face嵌入函数
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=5000,
        func=lambda texts: hf_embed(
            texts,
            tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -452,7 +450,6 @@ rag = LightRAG(
    # 使用Ollama嵌入函数
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -504,7 +501,6 @@ rag = LightRAG(
    # 使用Ollama嵌入函数
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -547,7 +543,6 @@ async def initialize_rag():
        llm_model_func=llama_index_complete_if_cache,  # LlamaIndex兼容的完成函数
        embedding_func=EmbeddingFunc(    # LlamaIndex兼容的嵌入函数
            embedding_dim=1536,
-            max_token_size=8192,
            func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
        ),
    )
@ -809,7 +804,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=8192,
        func=embedding_func,
    ),
    vector_storage="FaissVectorDBStorage",
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                ),
                embedding_func=EmbeddingFunc(
                    embedding_dim=3072,
-                    max_token_size=8192,
                    func=lambda texts: openai_embed(
                        texts,
                        model="text-embedding-3-large",
--- a/README.md
+++ b/README.md
@ -397,7 +397,6 @@ async def initialize_rag():
        llm_model_func=llm_model_func,
        embedding_func=EmbeddingFunc(
            embedding_dim=4096,
-            max_token_size=8192,
            func=embedding_func
        )
    )
@ -426,7 +425,6 @@ rag = LightRAG(
    # Use Hugging Face embedding function
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=5000,
        func=lambda texts: hf_embed(
            texts,
            tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -455,7 +453,6 @@ rag = LightRAG(
    # Use Ollama embedding function
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -507,7 +504,6 @@ rag = LightRAG(
    # Use Ollama embedding function
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
-        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts,
            embed_model="nomic-embed-text"
@ -550,7 +546,6 @@ async def initialize_rag():
        llm_model_func=llama_index_complete_if_cache,  # LlamaIndex-compatible completion function
        embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
            embedding_dim=1536,
-            max_token_size=8192,
            func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
        ),
    )
@ -872,7 +867,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
-        max_token_size=8192,
        func=embedding_func,
    ),
    vector_storage="FaissVectorDBStorage",
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                ),
                embedding_func=EmbeddingFunc(
                    embedding_dim=3072,
-                    max_token_size=8192,
                    func=lambda texts: openai_embed(
                        texts,
                        model="text-embedding-3-large",
--- a/docs/DockerDeployment.md
+++ b/docs/DockerDeployment.md
@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
 - `MAX_ASYNC`: Maximum async operations
 - `MAX_TOKENS`: Maximum token size
 - `EMBEDDING_DIM`: Embedding dimensions
- `MAX_EMBED_TOKENS`: Maximum embedding token size

 #### Security
 - `LIGHTRAG_API_KEY`: API key for authentication
--- a/env.example
+++ b/env.example
@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
 ### Embedding Configuration (Should not be changed after the first file processed)
 ####################################################################################
 ### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
+
+### see also env.ollama-binding-options.example for fine tuning ollama
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434
-### Maximum tokens sent to Embedding for each chunk (no longer in use?)
-# MAX_EMBED_TOKENS=8192

 ### OpenAI compatible
 # EMBEDDING_BINDING=openai
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
    args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
    args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
    args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
-    args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)

    # Inject chunk configuration
    args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -273,7 +273,6 @@ def create_app(args):

    embedding_func = EmbeddingFunc(
        embedding_dim=args.embedding_dim,
-        max_token_size=args.max_embed_tokens,
        func=lambda texts: lollms_embed(
            texts,
            embed_model=args.embedding_model,
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{args.summary_language}")
    ASCIIColors.white("    ├─ Max Parallel Insert: ", end="")
    ASCIIColors.yellow(f"{args.max_parallel_insert}")
-    ASCIIColors.white("    ├─ Max Embed Tokens: ", end="")
-    ASCIIColors.yellow(f"{args.max_embed_tokens}")
    ASCIIColors.white("    ├─ Chunk Size: ", end="")
    ASCIIColors.yellow(f"{args.chunk_size}")
    ASCIIColors.white("    ├─ Chunk Overlap Size: ", end="")
--- a/lightrag/llm/Readme.md
+++ b/lightrag/llm/Readme.md
@ -58,7 +58,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=1536,
-        max_token_size=8192,
        func=lambda texts: llama_index_embed(
            texts,
            embed_model=OpenAIEmbedding(
@ -114,7 +113,6 @@ rag = LightRAG(
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=1536,
-        max_token_size=8192,
        func=lambda texts: llama_index_embed(
            texts,
            embed_model=LiteLLMEmbedding(
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
 # Model Configuration
 LLM_MODEL=gpt-4
 EMBEDDING_MODEL=text-embedding-3-large
-EMBEDDING_MAX_TOKEN_SIZE=8192
 ```

 ### Key Differences
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@ -121,7 +121,7 @@ async def azure_openai_complete(
    return result


-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
--- a/lightrag/llm/bedrock.py
+++ b/lightrag/llm/bedrock.py
@ -110,7 +110,7 @@ async def bedrock_complete(
    return result


-# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+# @wrap_embedding_func_with_attrs(embedding_dim=1024)
 # @retry(
 #     stop=stop_after_attempt(3),
 #     wait=wait_exponential(multiplier=1, min=4, max=10),
--- a/lightrag/llm/jina.py
+++ b/lightrag/llm/jina.py
@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
            return data_list


-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/llama_index_impl.py
+++ b/lightrag/llm/llama_index_impl.py
@ -170,7 +170,7 @@ async def llama_index_complete(
    return result


-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/nvidia_openai.py
+++ b/lightrag/llm/nvidia_openai.py
@ -33,7 +33,7 @@ from lightrag.utils import (
 import numpy as np


-@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
+@wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@ -432,7 +432,7 @@ async def nvidia_openai_complete(
    return result


-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/llm/siliconcloud.py
+++ b/lightrag/llm/siliconcloud.py
@ -40,7 +40,7 @@ async def siliconcloud_embedding(
    texts: list[str],
    model: str = "netease-youdao/bce-embedding-base_v1",
    base_url: str = "https://api.siliconflow.cn/v1/embeddings",
-    max_token_size: int = 512,
+    max_token_size: int = 8192,
    api_key: str = None,
 ) -> np.ndarray:
    if api_key and not api_key.startswith("Bearer "):
--- a/lightrag/llm/zhipu.py
+++ b/lightrag/llm/zhipu.py
@ -167,7 +167,7 @@ async def zhipu_complete(
        )


-@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+@wrap_embedding_func_with_attrs(embedding_dim=1024)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -237,9 +237,8 @@ class UnlimitedSemaphore:
@dataclass
 class EmbeddingFunc:
    embedding_dim: int
-    max_token_size: int
    func: callable
-    # concurrent_limit: int = 16
+    max_token_size: int | None = None  # deprecated keep it for compatible only

    async def __call__(self, *args, **kwargs) -> np.ndarray:
        return await self.func(*args, **kwargs)
--- a/reproduce/Step_1_openai_compatible.py
+++ b/reproduce/Step_1_openai_compatible.py
@ -67,9 +67,7 @@ async def initialize_rag():
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
    )

    await rag.initialize_storages()
--- a/reproduce/Step_3_openai_compatible.py
+++ b/reproduce/Step_3_openai_compatible.py
@ -92,9 +92,7 @@ if __name__ == "__main__":
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=4096, max_token_size=8192, func=embedding_func
-        ),
+        embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
    )
    query_param = QueryParam(mode=mode)