Merge pull request #1875 from danielaskdd/remove-embedding-max-token-size

refactor: Remove deprecated `max_token_size` from embedding configura…
This commit is contained in:
Daniel.y 2025-07-29 11:23:54 +08:00 committed by GitHub
commit 7a5df185a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 13 additions and 40 deletions

View file

@ -396,7 +396,6 @@ async def initialize_rag():
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=4096, embedding_dim=4096,
max_token_size=8192,
func=embedding_func func=embedding_func
) )
) )
@ -425,7 +424,6 @@ rag = LightRAG(
# 使用Hugging Face嵌入函数 # 使用Hugging Face嵌入函数
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embed( func=lambda texts: hf_embed(
texts, texts,
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -452,7 +450,6 @@ rag = LightRAG(
# 使用Ollama嵌入函数 # 使用Ollama嵌入函数
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -504,7 +501,6 @@ rag = LightRAG(
# 使用Ollama嵌入函数 # 使用Ollama嵌入函数
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -547,7 +543,6 @@ async def initialize_rag():
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数 llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数 embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(texts, embed_model=embed_model) func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
), ),
) )
@ -809,7 +804,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=8192,
func=embedding_func, func=embedding_func,
), ),
vector_storage="FaissVectorDBStorage", vector_storage="FaissVectorDBStorage",
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
), ),
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=3072, embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed( func=lambda texts: openai_embed(
texts, texts,
model="text-embedding-3-large", model="text-embedding-3-large",

View file

@ -397,7 +397,6 @@ async def initialize_rag():
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=4096, embedding_dim=4096,
max_token_size=8192,
func=embedding_func func=embedding_func
) )
) )
@ -426,7 +425,6 @@ rag = LightRAG(
# Use Hugging Face embedding function # Use Hugging Face embedding function
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embed( func=lambda texts: hf_embed(
texts, texts,
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
@ -455,7 +453,6 @@ rag = LightRAG(
# Use Ollama embedding function # Use Ollama embedding function
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -507,7 +504,6 @@ rag = LightRAG(
# Use Ollama embedding function # Use Ollama embedding function
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=768, embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embed( func=lambda texts: ollama_embed(
texts, texts,
embed_model="nomic-embed-text" embed_model="nomic-embed-text"
@ -550,7 +546,6 @@ async def initialize_rag():
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(texts, embed_model=embed_model) func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
), ),
) )
@ -872,7 +867,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=384, embedding_dim=384,
max_token_size=8192,
func=embedding_func, func=embedding_func,
), ),
vector_storage="FaissVectorDBStorage", vector_storage="FaissVectorDBStorage",
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
), ),
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=3072, embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed( func=lambda texts: openai_embed(
texts, texts,
model="text-embedding-3-large", model="text-embedding-3-large",

View file

@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
- `MAX_ASYNC`: Maximum async operations - `MAX_ASYNC`: Maximum async operations
- `MAX_TOKENS`: Maximum token size - `MAX_TOKENS`: Maximum token size
- `EMBEDDING_DIM`: Embedding dimensions - `EMBEDDING_DIM`: Embedding dimensions
- `MAX_EMBED_TOKENS`: Maximum embedding token size
#### Security #### Security
- `LIGHTRAG_API_KEY`: API key for authentication - `LIGHTRAG_API_KEY`: API key for authentication

View file

@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
### Embedding Configuration (Should not be changed after the first file processed) ### Embedding Configuration (Should not be changed after the first file processed)
#################################################################################### ####################################################################################
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina ### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
### see also env.ollama-binding-options.example for fine tuning ollama
EMBEDDING_BINDING=ollama EMBEDDING_BINDING=ollama
EMBEDDING_MODEL=bge-m3:latest EMBEDDING_MODEL=bge-m3:latest
EMBEDDING_DIM=1024 EMBEDDING_DIM=1024
EMBEDDING_BINDING_API_KEY=your_api_key EMBEDDING_BINDING_API_KEY=your_api_key
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_BINDING_HOST=http://localhost:11434
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
# MAX_EMBED_TOKENS=8192
### OpenAI compatible ### OpenAI compatible
# EMBEDDING_BINDING=openai # EMBEDDING_BINDING=openai

View file

@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest") args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest") args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int) args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
# Inject chunk configuration # Inject chunk configuration
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int) args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)

View file

@ -273,7 +273,6 @@ def create_app(args):
embedding_func = EmbeddingFunc( embedding_func = EmbeddingFunc(
embedding_dim=args.embedding_dim, embedding_dim=args.embedding_dim,
max_token_size=args.max_embed_tokens,
func=lambda texts: lollms_embed( func=lambda texts: lollms_embed(
texts, texts,
embed_model=args.embedding_model, embed_model=args.embedding_model,

View file

@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.summary_language}") ASCIIColors.yellow(f"{args.summary_language}")
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="") ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
ASCIIColors.yellow(f"{args.max_parallel_insert}") ASCIIColors.yellow(f"{args.max_parallel_insert}")
ASCIIColors.white(" ├─ Max Embed Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_embed_tokens}")
ASCIIColors.white(" ├─ Chunk Size: ", end="") ASCIIColors.white(" ├─ Chunk Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_size}") ASCIIColors.yellow(f"{args.chunk_size}")
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="") ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")

View file

@ -58,7 +58,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed( func=lambda texts: llama_index_embed(
texts, texts,
embed_model=OpenAIEmbedding( embed_model=OpenAIEmbedding(
@ -114,7 +113,6 @@ rag = LightRAG(
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
embedding_dim=1536, embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed( func=lambda texts: llama_index_embed(
texts, texts,
embed_model=LiteLLMEmbedding( embed_model=LiteLLMEmbedding(
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
# Model Configuration # Model Configuration
LLM_MODEL=gpt-4 LLM_MODEL=gpt-4
EMBEDDING_MODEL=text-embedding-3-large EMBEDDING_MODEL=text-embedding-3-large
EMBEDDING_MAX_TOKEN_SIZE=8192
``` ```
### Key Differences ### Key Differences

View file

@ -121,7 +121,7 @@ async def azure_openai_complete(
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191) @wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10), wait=wait_exponential(multiplier=1, min=4, max=10),

View file

@ -110,7 +110,7 @@ async def bedrock_complete(
return result return result
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) # @wrap_embedding_func_with_attrs(embedding_dim=1024)
# @retry( # @retry(
# stop=stop_after_attempt(3), # stop=stop_after_attempt(3),
# wait=wait_exponential(multiplier=1, min=4, max=10), # wait=wait_exponential(multiplier=1, min=4, max=10),

View file

@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
return data_list return data_list
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -170,7 +170,7 @@ async def llama_index_complete(
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -33,7 +33,7 @@ from lightrag.utils import (
import numpy as np import numpy as np
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512) @wrap_embedding_func_with_attrs(embedding_dim=2048)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -432,7 +432,7 @@ async def nvidia_openai_complete(
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -40,7 +40,7 @@ async def siliconcloud_embedding(
texts: list[str], texts: list[str],
model: str = "netease-youdao/bce-embedding-base_v1", model: str = "netease-youdao/bce-embedding-base_v1",
base_url: str = "https://api.siliconflow.cn/v1/embeddings", base_url: str = "https://api.siliconflow.cn/v1/embeddings",
max_token_size: int = 512, max_token_size: int = 8192,
api_key: str = None, api_key: str = None,
) -> np.ndarray: ) -> np.ndarray:
if api_key and not api_key.startswith("Bearer "): if api_key and not api_key.startswith("Bearer "):

View file

@ -167,7 +167,7 @@ async def zhipu_complete(
) )
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=1024)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60), wait=wait_exponential(multiplier=1, min=4, max=60),

View file

@ -237,9 +237,8 @@ class UnlimitedSemaphore:
@dataclass @dataclass
class EmbeddingFunc: class EmbeddingFunc:
embedding_dim: int embedding_dim: int
max_token_size: int
func: callable func: callable
# concurrent_limit: int = 16 max_token_size: int | None = None # deprecated keep it for compatible only
async def __call__(self, *args, **kwargs) -> np.ndarray: async def __call__(self, *args, **kwargs) -> np.ndarray:
return await self.func(*args, **kwargs) return await self.func(*args, **kwargs)

View file

@ -67,9 +67,7 @@ async def initialize_rag():
rag = LightRAG( rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
embedding_dim=4096, max_token_size=8192, func=embedding_func
),
) )
await rag.initialize_storages() await rag.initialize_storages()

View file

@ -92,9 +92,7 @@ if __name__ == "__main__":
rag = LightRAG( rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
embedding_dim=4096, max_token_size=8192, func=embedding_func
),
) )
query_param = QueryParam(mode=mode) query_param = QueryParam(mode=mode)