refactor: Remove deprecated max_token_size from embedding configuration

This parameter is no longer used. Its removal simplifies the API and clarifies that token length management is handled by upstream text chunking logic rather than the embedding wrapper.
This commit is contained in:
yangdx 2025-07-29 10:49:35 +08:00
parent d26d413d97
commit 9923821d75
19 changed files with 13 additions and 40 deletions

View file

@ -396,7 +396,6 @@ async def initialize_rag():
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=4096, embedding_dim=4096,
max_token_size=8192,
func=embedding_func func=embedding_func
) )
) )
@ -425,7 +424,6 @@ rag = LightRAG(
# 使用Hugging Face嵌入函数 # 使用Hugging Face嵌入函数
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embed( func=lambda texts: hf_embed(
texts, texts,
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -452,7 +450,6 @@ rag = LightRAG(
# 使用Ollama嵌入函数 # 使用Ollama嵌入函数
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -504,7 +501,6 @@ rag = LightRAG(
# 使用Ollama嵌入函数 # 使用Ollama嵌入函数
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -547,7 +543,6 @@ async def initialize_rag():
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数 llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数 embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(texts, embed_model=embed_model) func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
), ),
) )
@ -809,7 +804,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=8192,
func=embedding_func, func=embedding_func,
), ),
vector_storage="FaissVectorDBStorage", vector_storage="FaissVectorDBStorage",
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
), ),
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=3072, embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed( func=lambda texts: openai_embed(
texts, texts,
model="text-embedding-3-large", model="text-embedding-3-large",

View file

@ -397,7 +397,6 @@ async def initialize_rag():
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=4096, embedding_dim=4096,
max_token_size=8192,
func=embedding_func func=embedding_func
) )
) )
@ -426,7 +425,6 @@ rag = LightRAG(
# Use Hugging Face embedding function # Use Hugging Face embedding function
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embed( func=lambda texts: hf_embed(
texts, texts,
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -455,7 +453,6 @@ rag = LightRAG(
# Use Ollama embedding function # Use Ollama embedding function
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -507,7 +504,6 @@ rag = LightRAG(
# Use Ollama embedding function # Use Ollama embedding function
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -550,7 +546,6 @@ async def initialize_rag():
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(texts, embed_model=embed_model) func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
), ),
) )
@ -872,7 +867,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=8192,
func=embedding_func, func=embedding_func,
), ),
vector_storage="FaissVectorDBStorage", vector_storage="FaissVectorDBStorage",
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
), ),
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=3072, embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed( func=lambda texts: openai_embed(
texts, texts,
model="text-embedding-3-large", model="text-embedding-3-large",

View file

@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
- `MAX_ASYNC`: Maximum async operations - `MAX_ASYNC`: Maximum async operations
- `MAX_TOKENS`: Maximum token size - `MAX_TOKENS`: Maximum token size
- `EMBEDDING_DIM`: Embedding dimensions - `EMBEDDING_DIM`: Embedding dimensions
- `MAX_EMBED_TOKENS`: Maximum embedding token size
#### Security #### Security
- `LIGHTRAG_API_KEY`: API key for authentication - `LIGHTRAG_API_KEY`: API key for authentication

View file

@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
### Embedding Configuration (Should not be changed after the first file processed) ### Embedding Configuration (Should not be changed after the first file processed)
#################################################################################### ####################################################################################
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina ### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
### see also env.ollama-binding-options.example for fine tuning ollama
EMBEDDING_BINDING=ollama EMBEDDING_BINDING=ollama
EMBEDDING_MODEL=bge-m3:latest EMBEDDING_MODEL=bge-m3:latest
EMBEDDING_DIM=1024 EMBEDDING_DIM=1024
EMBEDDING_BINDING_API_KEY=your_api_key EMBEDDING_BINDING_API_KEY=your_api_key
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_BINDING_HOST=http://localhost:11434
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
# MAX_EMBED_TOKENS=8192
### OpenAI compatible ### OpenAI compatible
# EMBEDDING_BINDING=openai # EMBEDDING_BINDING=openai

View file

@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest") args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest") args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int) args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
# Inject chunk configuration # Inject chunk configuration
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int) args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)

View file

@ -273,7 +273,6 @@ def create_app(args):
embedding_func = EmbeddingFunc( embedding_func = EmbeddingFunc(
embedding_dim=args.embedding_dim, embedding_dim=args.embedding_dim,
max_token_size=args.max_embed_tokens,
func=lambda texts: lollms_embed( func=lambda texts: lollms_embed(
texts, texts,
embed_model=args.embedding_model, embed_model=args.embedding_model,

View file

@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.summary_language}") ASCIIColors.yellow(f"{args.summary_language}")
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="") ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
ASCIIColors.yellow(f"{args.max_parallel_insert}") ASCIIColors.yellow(f"{args.max_parallel_insert}")
ASCIIColors.white(" ├─ Max Embed Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_embed_tokens}")
ASCIIColors.white(" ├─ Chunk Size: ", end="") ASCIIColors.white(" ├─ Chunk Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_size}") ASCIIColors.yellow(f"{args.chunk_size}")
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="") ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")

View file

@ -58,7 +58,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed( func=lambda texts: llama_index_embed(
texts, texts,
embed_model=OpenAIEmbedding( embed_model=OpenAIEmbedding(
@ -114,7 +113,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed( func=lambda texts: llama_index_embed(
texts, texts,
embed_model=LiteLLMEmbedding( embed_model=LiteLLMEmbedding(
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
# Model Configuration # Model Configuration
LLM_MODEL=gpt-4 LLM_MODEL=gpt-4
EMBEDDING_MODEL=text-embedding-3-large EMBEDDING_MODEL=text-embedding-3-large
EMBEDDING_MAX_TOKEN_SIZE=8192
``` ```
### Key Differences ### Key Differences

View file

@ -121,7 +121,7 @@ async def azure_openai_complete(
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191) @wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10), wait=wait_exponential(multiplier=1, min=4, max=10),

View file

@ -110,7 +110,7 @@ async def bedrock_complete(
return result return result
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) # @wrap_embedding_func_with_attrs(embedding_dim=1024)
# @retry( # @retry(
# stop=stop_after_attempt(3), # stop=stop_after_attempt(3),
# wait=wait_exponential(multiplier=1, min=4, max=10), # wait=wait_exponential(multiplier=1, min=4, max=10),

View file

@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
return data_list return data_list
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -170,7 +170,7 @@ async def llama_index_complete(
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -33,7 +33,7 @@ from lightrag.utils import (
import numpy as np import numpy as np
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512) @wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -432,7 +432,7 @@ async def nvidia_openai_complete(
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -40,7 +40,7 @@ async def siliconcloud_embedding(
texts: list[str], texts: list[str],
model: str = "netease-youdao/bce-embedding-base_v1", model: str = "netease-youdao/bce-embedding-base_v1",
base_url: str = "https://api.siliconflow.cn/v1/embeddings", base_url: str = "https://api.siliconflow.cn/v1/embeddings",
max_token_size: int = 512, max_token_size: int = 8192,
api_key: str = None, api_key: str = None,
) -> np.ndarray: ) -> np.ndarray:
if api_key and not api_key.startswith("Bearer "): if api_key and not api_key.startswith("Bearer "):

View file

@ -167,7 +167,7 @@ async def zhipu_complete(
) )
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=1024)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -237,9 +237,8 @@ class UnlimitedSemaphore:
@dataclass @dataclass
class EmbeddingFunc: class EmbeddingFunc:
embedding_dim: int embedding_dim: int
max_token_size: int
func: callable func: callable
# concurrent_limit: int = 16 max_token_size: int | None = None # deprecated keep it for compatible only
async def __call__(self, *args, **kwargs) -> np.ndarray: async def __call__(self, *args, **kwargs) -> np.ndarray:
return await self.func(*args, **kwargs) return await self.func(*args, **kwargs)

View file

@ -67,9 +67,7 @@ async def initialize_rag():
rag = LightRAG( rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
embedding_dim=4096, max_token_size=8192, func=embedding_func
),
) )
await rag.initialize_storages() await rag.initialize_storages()

View file

@ -92,9 +92,7 @@ if __name__ == "__main__":
rag = LightRAG( rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
embedding_dim=4096, max_token_size=8192, func=embedding_func
),
) )
query_param = QueryParam(mode=mode) query_param = QueryParam(mode=mode)