refactor: Remove deprecated max_token_size from embedding configuration

This parameter is no longer used. Its removal simplifies the API and clarifies that token length management is handled by upstream text chunking logic rather than the embedding wrapper.
This commit is contained in:
yangdx 2025-07-29 10:49:35 +08:00
parent d26d413d97
commit 9923821d75
19 changed files with 13 additions and 40 deletions

View file

@ -396,7 +396,6 @@ async def initialize_rag():
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=4096,
max_token_size=8192,
func=embedding_func
)
)
@ -425,7 +424,6 @@ rag = LightRAG(
# 使用Hugging Face嵌入函数
embedding_func=EmbeddingFunc(
embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embed(
texts,
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -452,7 +450,6 @@ rag = LightRAG(
# 使用Ollama嵌入函数
embedding_func=EmbeddingFunc(
embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
@ -504,7 +501,6 @@ rag = LightRAG(
# 使用Ollama嵌入函数
embedding_func=EmbeddingFunc(
embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
@ -547,7 +543,6 @@ async def initialize_rag():
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
),
)
@ -809,7 +804,6 @@ rag = LightRAG(
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=384,
max_token_size=8192,
func=embedding_func,
),
vector_storage="FaissVectorDBStorage",
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
),
embedding_func=EmbeddingFunc(
embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",

View file

@ -397,7 +397,6 @@ async def initialize_rag():
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=4096,
max_token_size=8192,
func=embedding_func
)
)
@ -426,7 +425,6 @@ rag = LightRAG(
# Use Hugging Face embedding function
embedding_func=EmbeddingFunc(
embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embed(
texts,
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -455,7 +453,6 @@ rag = LightRAG(
# Use Ollama embedding function
embedding_func=EmbeddingFunc(
embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
@ -507,7 +504,6 @@ rag = LightRAG(
# Use Ollama embedding function
embedding_func=EmbeddingFunc(
embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
@ -550,7 +546,6 @@ async def initialize_rag():
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
),
)
@ -872,7 +867,6 @@ rag = LightRAG(
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=384,
max_token_size=8192,
func=embedding_func,
),
vector_storage="FaissVectorDBStorage",
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
),
embedding_func=EmbeddingFunc(
embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",

View file

@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
- `MAX_ASYNC`: Maximum async operations
- `MAX_TOKENS`: Maximum token size
- `EMBEDDING_DIM`: Embedding dimensions
- `MAX_EMBED_TOKENS`: Maximum embedding token size
#### Security
- `LIGHTRAG_API_KEY`: API key for authentication

View file

@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
### Embedding Configuration (Should not be changed after the first file processed)
####################################################################################
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
### see also env.ollama-binding-options.example for fine tuning ollama
EMBEDDING_BINDING=ollama
EMBEDDING_MODEL=bge-m3:latest
EMBEDDING_DIM=1024
EMBEDDING_BINDING_API_KEY=your_api_key
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
EMBEDDING_BINDING_HOST=http://localhost:11434
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
# MAX_EMBED_TOKENS=8192
### OpenAI compatible
# EMBEDDING_BINDING=openai

View file

@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
# Inject chunk configuration
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)

View file

@ -273,7 +273,6 @@ def create_app(args):
embedding_func = EmbeddingFunc(
embedding_dim=args.embedding_dim,
max_token_size=args.max_embed_tokens,
func=lambda texts: lollms_embed(
texts,
embed_model=args.embedding_model,

View file

@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.summary_language}")
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
ASCIIColors.yellow(f"{args.max_parallel_insert}")
ASCIIColors.white(" ├─ Max Embed Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_embed_tokens}")
ASCIIColors.white(" ├─ Chunk Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_size}")
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")

View file

@ -58,7 +58,6 @@ rag = LightRAG(
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(
texts,
embed_model=OpenAIEmbedding(
@ -114,7 +113,6 @@ rag = LightRAG(
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(
texts,
embed_model=LiteLLMEmbedding(
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
# Model Configuration
LLM_MODEL=gpt-4
EMBEDDING_MODEL=text-embedding-3-large
EMBEDDING_MAX_TOKEN_SIZE=8192
```
### Key Differences

View file

@ -121,7 +121,7 @@ async def azure_openai_complete(
return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),

View file

@ -110,7 +110,7 @@ async def bedrock_complete(
return result
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
# @wrap_embedding_func_with_attrs(embedding_dim=1024)
# @retry(
# stop=stop_after_attempt(3),
# wait=wait_exponential(multiplier=1, min=4, max=10),

View file

@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
return data_list
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
@wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -170,7 +170,7 @@ async def llama_index_complete(
return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -33,7 +33,7 @@ from lightrag.utils import (
import numpy as np
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
@wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -432,7 +432,7 @@ async def nvidia_openai_complete(
return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -40,7 +40,7 @@ async def siliconcloud_embedding(
texts: list[str],
model: str = "netease-youdao/bce-embedding-base_v1",
base_url: str = "https://api.siliconflow.cn/v1/embeddings",
max_token_size: int = 512,
max_token_size: int = 8192,
api_key: str = None,
) -> np.ndarray:
if api_key and not api_key.startswith("Bearer "):

View file

@ -167,7 +167,7 @@ async def zhipu_complete(
)
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
@wrap_embedding_func_with_attrs(embedding_dim=1024)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -237,9 +237,8 @@ class UnlimitedSemaphore:
@dataclass
class EmbeddingFunc:
embedding_dim: int
max_token_size: int
func: callable
# concurrent_limit: int = 16
max_token_size: int | None = None # deprecated keep it for compatible only
async def __call__(self, *args, **kwargs) -> np.ndarray:
return await self.func(*args, **kwargs)

View file

@ -67,9 +67,7 @@ async def initialize_rag():
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=4096, max_token_size=8192, func=embedding_func
),
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
)
await rag.initialize_storages()

View file

@ -92,9 +92,7 @@ if __name__ == "__main__":
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=4096, max_token_size=8192, func=embedding_func
),
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
)
query_param = QueryParam(mode=mode)