refactor: Remove deprecated max_token_size from embedding configuration
This parameter is no longer used. Its removal simplifies the API and clarifies that token length management is handled by upstream text chunking logic rather than the embedding wrapper.
This commit is contained in:
parent
d26d413d97
commit
9923821d75
19 changed files with 13 additions and 40 deletions
|
|
@ -396,7 +396,6 @@ async def initialize_rag():
|
|||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=4096,
|
||||
max_token_size=8192,
|
||||
func=embedding_func
|
||||
)
|
||||
)
|
||||
|
|
@ -425,7 +424,6 @@ rag = LightRAG(
|
|||
# 使用Hugging Face嵌入函数
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=384,
|
||||
max_token_size=5000,
|
||||
func=lambda texts: hf_embed(
|
||||
texts,
|
||||
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
||||
|
|
@ -452,7 +450,6 @@ rag = LightRAG(
|
|||
# 使用Ollama嵌入函数
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=768,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: ollama_embed(
|
||||
texts,
|
||||
embed_model="nomic-embed-text"
|
||||
|
|
@ -504,7 +501,6 @@ rag = LightRAG(
|
|||
# 使用Ollama嵌入函数
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=768,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: ollama_embed(
|
||||
texts,
|
||||
embed_model="nomic-embed-text"
|
||||
|
|
@ -547,7 +543,6 @@ async def initialize_rag():
|
|||
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
|
||||
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
|
||||
embedding_dim=1536,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
||||
),
|
||||
)
|
||||
|
|
@ -809,7 +804,6 @@ rag = LightRAG(
|
|||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=384,
|
||||
max_token_size=8192,
|
||||
func=embedding_func,
|
||||
),
|
||||
vector_storage="FaissVectorDBStorage",
|
||||
|
|
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
|||
),
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
|
|
|
|||
|
|
@ -397,7 +397,6 @@ async def initialize_rag():
|
|||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=4096,
|
||||
max_token_size=8192,
|
||||
func=embedding_func
|
||||
)
|
||||
)
|
||||
|
|
@ -426,7 +425,6 @@ rag = LightRAG(
|
|||
# Use Hugging Face embedding function
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=384,
|
||||
max_token_size=5000,
|
||||
func=lambda texts: hf_embed(
|
||||
texts,
|
||||
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
||||
|
|
@ -455,7 +453,6 @@ rag = LightRAG(
|
|||
# Use Ollama embedding function
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=768,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: ollama_embed(
|
||||
texts,
|
||||
embed_model="nomic-embed-text"
|
||||
|
|
@ -507,7 +504,6 @@ rag = LightRAG(
|
|||
# Use Ollama embedding function
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=768,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: ollama_embed(
|
||||
texts,
|
||||
embed_model="nomic-embed-text"
|
||||
|
|
@ -550,7 +546,6 @@ async def initialize_rag():
|
|||
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
|
||||
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
|
||||
embedding_dim=1536,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
||||
),
|
||||
)
|
||||
|
|
@ -872,7 +867,6 @@ rag = LightRAG(
|
|||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=384,
|
||||
max_token_size=8192,
|
||||
func=embedding_func,
|
||||
),
|
||||
vector_storage="FaissVectorDBStorage",
|
||||
|
|
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
|||
),
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
|
|
|
|||
|
|
@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
|
|||
- `MAX_ASYNC`: Maximum async operations
|
||||
- `MAX_TOKENS`: Maximum token size
|
||||
- `EMBEDDING_DIM`: Embedding dimensions
|
||||
- `MAX_EMBED_TOKENS`: Maximum embedding token size
|
||||
|
||||
#### Security
|
||||
- `LIGHTRAG_API_KEY`: API key for authentication
|
||||
|
|
|
|||
|
|
@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
|
|||
### Embedding Configuration (Should not be changed after the first file processed)
|
||||
####################################################################################
|
||||
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
|
||||
|
||||
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_MODEL=bge-m3:latest
|
||||
EMBEDDING_DIM=1024
|
||||
EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
|
||||
# MAX_EMBED_TOKENS=8192
|
||||
|
||||
### OpenAI compatible
|
||||
# EMBEDDING_BINDING=openai
|
||||
|
|
|
|||
|
|
@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
|
|||
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
||||
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
||||
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
||||
args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
|
||||
|
||||
# Inject chunk configuration
|
||||
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
||||
|
|
|
|||
|
|
@ -273,7 +273,6 @@ def create_app(args):
|
|||
|
||||
embedding_func = EmbeddingFunc(
|
||||
embedding_dim=args.embedding_dim,
|
||||
max_token_size=args.max_embed_tokens,
|
||||
func=lambda texts: lollms_embed(
|
||||
texts,
|
||||
embed_model=args.embedding_model,
|
||||
|
|
|
|||
|
|
@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|||
ASCIIColors.yellow(f"{args.summary_language}")
|
||||
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
|
||||
ASCIIColors.yellow(f"{args.max_parallel_insert}")
|
||||
ASCIIColors.white(" ├─ Max Embed Tokens: ", end="")
|
||||
ASCIIColors.yellow(f"{args.max_embed_tokens}")
|
||||
ASCIIColors.white(" ├─ Chunk Size: ", end="")
|
||||
ASCIIColors.yellow(f"{args.chunk_size}")
|
||||
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
||||
|
|
|
|||
|
|
@ -58,7 +58,6 @@ rag = LightRAG(
|
|||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=1536,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: llama_index_embed(
|
||||
texts,
|
||||
embed_model=OpenAIEmbedding(
|
||||
|
|
@ -114,7 +113,6 @@ rag = LightRAG(
|
|||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=1536,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: llama_index_embed(
|
||||
texts,
|
||||
embed_model=LiteLLMEmbedding(
|
||||
|
|
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
|
|||
# Model Configuration
|
||||
LLM_MODEL=gpt-4
|
||||
EMBEDDING_MODEL=text-embedding-3-large
|
||||
EMBEDDING_MAX_TOKEN_SIZE=8192
|
||||
```
|
||||
|
||||
### Key Differences
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ async def azure_openai_complete(
|
|||
return result
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ async def bedrock_complete(
|
|||
return result
|
||||
|
||||
|
||||
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
||||
# @wrap_embedding_func_with_attrs(embedding_dim=1024)
|
||||
# @retry(
|
||||
# stop=stop_after_attempt(3),
|
||||
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
|
|||
return data_list
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ async def llama_index_complete(
|
|||
return result
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ from lightrag.utils import (
|
|||
import numpy as np
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
|
|
|
|||
|
|
@ -432,7 +432,7 @@ async def nvidia_openai_complete(
|
|||
return result
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ async def siliconcloud_embedding(
|
|||
texts: list[str],
|
||||
model: str = "netease-youdao/bce-embedding-base_v1",
|
||||
base_url: str = "https://api.siliconflow.cn/v1/embeddings",
|
||||
max_token_size: int = 512,
|
||||
max_token_size: int = 8192,
|
||||
api_key: str = None,
|
||||
) -> np.ndarray:
|
||||
if api_key and not api_key.startswith("Bearer "):
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ async def zhipu_complete(
|
|||
)
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1024)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
|
|
|
|||
|
|
@ -237,9 +237,8 @@ class UnlimitedSemaphore:
|
|||
@dataclass
|
||||
class EmbeddingFunc:
|
||||
embedding_dim: int
|
||||
max_token_size: int
|
||||
func: callable
|
||||
# concurrent_limit: int = 16
|
||||
max_token_size: int | None = None # deprecated keep it for compatible only
|
||||
|
||||
async def __call__(self, *args, **kwargs) -> np.ndarray:
|
||||
return await self.func(*args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -67,9 +67,7 @@ async def initialize_rag():
|
|||
rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=4096, max_token_size=8192, func=embedding_func
|
||||
),
|
||||
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
|
||||
)
|
||||
|
||||
await rag.initialize_storages()
|
||||
|
|
|
|||
|
|
@ -92,9 +92,7 @@ if __name__ == "__main__":
|
|||
rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=4096, max_token_size=8192, func=embedding_func
|
||||
),
|
||||
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
|
||||
)
|
||||
query_param = QueryParam(mode=mode)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue