refactor: Remove deprecated max_token_size from embedding configuration
This parameter is no longer used. Its removal simplifies the API and clarifies that token length management is handled by upstream text chunking logic rather than the embedding wrapper.
This commit is contained in:
parent
d26d413d97
commit
9923821d75
19 changed files with 13 additions and 40 deletions
|
|
@ -396,7 +396,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=4096,
|
embedding_dim=4096,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func
|
func=embedding_func
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -425,7 +424,6 @@ rag = LightRAG(
|
||||||
# 使用Hugging Face嵌入函数
|
# 使用Hugging Face嵌入函数
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=5000,
|
|
||||||
func=lambda texts: hf_embed(
|
func=lambda texts: hf_embed(
|
||||||
texts,
|
texts,
|
||||||
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
||||||
|
|
@ -452,7 +450,6 @@ rag = LightRAG(
|
||||||
# 使用Ollama嵌入函数
|
# 使用Ollama嵌入函数
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -504,7 +501,6 @@ rag = LightRAG(
|
||||||
# 使用Ollama嵌入函数
|
# 使用Ollama嵌入函数
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -547,7 +543,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
|
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
|
||||||
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
|
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
@ -809,7 +804,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func,
|
func=embedding_func,
|
||||||
),
|
),
|
||||||
vector_storage="FaissVectorDBStorage",
|
vector_storage="FaissVectorDBStorage",
|
||||||
|
|
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
||||||
),
|
),
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=3072,
|
embedding_dim=3072,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: openai_embed(
|
func=lambda texts: openai_embed(
|
||||||
texts,
|
texts,
|
||||||
model="text-embedding-3-large",
|
model="text-embedding-3-large",
|
||||||
|
|
|
||||||
|
|
@ -397,7 +397,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=4096,
|
embedding_dim=4096,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func
|
func=embedding_func
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -426,7 +425,6 @@ rag = LightRAG(
|
||||||
# Use Hugging Face embedding function
|
# Use Hugging Face embedding function
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=5000,
|
|
||||||
func=lambda texts: hf_embed(
|
func=lambda texts: hf_embed(
|
||||||
texts,
|
texts,
|
||||||
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
||||||
|
|
@ -455,7 +453,6 @@ rag = LightRAG(
|
||||||
# Use Ollama embedding function
|
# Use Ollama embedding function
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -507,7 +504,6 @@ rag = LightRAG(
|
||||||
# Use Ollama embedding function
|
# Use Ollama embedding function
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -550,7 +546,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
|
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
|
||||||
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
|
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
@ -872,7 +867,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func,
|
func=embedding_func,
|
||||||
),
|
),
|
||||||
vector_storage="FaissVectorDBStorage",
|
vector_storage="FaissVectorDBStorage",
|
||||||
|
|
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
||||||
),
|
),
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=3072,
|
embedding_dim=3072,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: openai_embed(
|
func=lambda texts: openai_embed(
|
||||||
texts,
|
texts,
|
||||||
model="text-embedding-3-large",
|
model="text-embedding-3-large",
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
|
||||||
- `MAX_ASYNC`: Maximum async operations
|
- `MAX_ASYNC`: Maximum async operations
|
||||||
- `MAX_TOKENS`: Maximum token size
|
- `MAX_TOKENS`: Maximum token size
|
||||||
- `EMBEDDING_DIM`: Embedding dimensions
|
- `EMBEDDING_DIM`: Embedding dimensions
|
||||||
- `MAX_EMBED_TOKENS`: Maximum embedding token size
|
|
||||||
|
|
||||||
#### Security
|
#### Security
|
||||||
- `LIGHTRAG_API_KEY`: API key for authentication
|
- `LIGHTRAG_API_KEY`: API key for authentication
|
||||||
|
|
|
||||||
|
|
@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
|
||||||
### Embedding Configuration (Should not be changed after the first file processed)
|
### Embedding Configuration (Should not be changed after the first file processed)
|
||||||
####################################################################################
|
####################################################################################
|
||||||
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
|
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
|
||||||
|
|
||||||
|
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||||
EMBEDDING_BINDING=ollama
|
EMBEDDING_BINDING=ollama
|
||||||
EMBEDDING_MODEL=bge-m3:latest
|
EMBEDDING_MODEL=bge-m3:latest
|
||||||
EMBEDDING_DIM=1024
|
EMBEDDING_DIM=1024
|
||||||
EMBEDDING_BINDING_API_KEY=your_api_key
|
EMBEDDING_BINDING_API_KEY=your_api_key
|
||||||
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
||||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||||
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
|
|
||||||
# MAX_EMBED_TOKENS=8192
|
|
||||||
|
|
||||||
### OpenAI compatible
|
### OpenAI compatible
|
||||||
# EMBEDDING_BINDING=openai
|
# EMBEDDING_BINDING=openai
|
||||||
|
|
|
||||||
|
|
@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
|
||||||
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
||||||
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
||||||
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
||||||
args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
|
|
||||||
|
|
||||||
# Inject chunk configuration
|
# Inject chunk configuration
|
||||||
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
||||||
|
|
|
||||||
|
|
@ -273,7 +273,6 @@ def create_app(args):
|
||||||
|
|
||||||
embedding_func = EmbeddingFunc(
|
embedding_func = EmbeddingFunc(
|
||||||
embedding_dim=args.embedding_dim,
|
embedding_dim=args.embedding_dim,
|
||||||
max_token_size=args.max_embed_tokens,
|
|
||||||
func=lambda texts: lollms_embed(
|
func=lambda texts: lollms_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=args.embedding_model,
|
embed_model=args.embedding_model,
|
||||||
|
|
|
||||||
|
|
@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
||||||
ASCIIColors.yellow(f"{args.summary_language}")
|
ASCIIColors.yellow(f"{args.summary_language}")
|
||||||
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
|
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.max_parallel_insert}")
|
ASCIIColors.yellow(f"{args.max_parallel_insert}")
|
||||||
ASCIIColors.white(" ├─ Max Embed Tokens: ", end="")
|
|
||||||
ASCIIColors.yellow(f"{args.max_embed_tokens}")
|
|
||||||
ASCIIColors.white(" ├─ Chunk Size: ", end="")
|
ASCIIColors.white(" ├─ Chunk Size: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.chunk_size}")
|
ASCIIColors.yellow(f"{args.chunk_size}")
|
||||||
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(
|
func=lambda texts: llama_index_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=OpenAIEmbedding(
|
embed_model=OpenAIEmbedding(
|
||||||
|
|
@ -114,7 +113,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(
|
func=lambda texts: llama_index_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=LiteLLMEmbedding(
|
embed_model=LiteLLMEmbedding(
|
||||||
|
|
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
|
||||||
# Model Configuration
|
# Model Configuration
|
||||||
LLM_MODEL=gpt-4
|
LLM_MODEL=gpt-4
|
||||||
EMBEDDING_MODEL=text-embedding-3-large
|
EMBEDDING_MODEL=text-embedding-3-large
|
||||||
EMBEDDING_MAX_TOKEN_SIZE=8192
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Key Differences
|
### Key Differences
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ async def azure_openai_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,7 @@ async def bedrock_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
# @wrap_embedding_func_with_attrs(embedding_dim=1024)
|
||||||
# @retry(
|
# @retry(
|
||||||
# stop=stop_after_attempt(3),
|
# stop=stop_after_attempt(3),
|
||||||
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
|
||||||
return data_list
|
return data_list
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ async def llama_index_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ from lightrag.utils import (
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
|
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -432,7 +432,7 @@ async def nvidia_openai_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ async def siliconcloud_embedding(
|
||||||
texts: list[str],
|
texts: list[str],
|
||||||
model: str = "netease-youdao/bce-embedding-base_v1",
|
model: str = "netease-youdao/bce-embedding-base_v1",
|
||||||
base_url: str = "https://api.siliconflow.cn/v1/embeddings",
|
base_url: str = "https://api.siliconflow.cn/v1/embeddings",
|
||||||
max_token_size: int = 512,
|
max_token_size: int = 8192,
|
||||||
api_key: str = None,
|
api_key: str = None,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
if api_key and not api_key.startswith("Bearer "):
|
if api_key and not api_key.startswith("Bearer "):
|
||||||
|
|
|
||||||
|
|
@ -167,7 +167,7 @@ async def zhipu_complete(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=1024)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -237,9 +237,8 @@ class UnlimitedSemaphore:
|
||||||
@dataclass
|
@dataclass
|
||||||
class EmbeddingFunc:
|
class EmbeddingFunc:
|
||||||
embedding_dim: int
|
embedding_dim: int
|
||||||
max_token_size: int
|
|
||||||
func: callable
|
func: callable
|
||||||
# concurrent_limit: int = 16
|
max_token_size: int | None = None # deprecated keep it for compatible only
|
||||||
|
|
||||||
async def __call__(self, *args, **kwargs) -> np.ndarray:
|
async def __call__(self, *args, **kwargs) -> np.ndarray:
|
||||||
return await self.func(*args, **kwargs)
|
return await self.func(*args, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -67,9 +67,7 @@ async def initialize_rag():
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
|
||||||
embedding_dim=4096, max_token_size=8192, func=embedding_func
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await rag.initialize_storages()
|
await rag.initialize_storages()
|
||||||
|
|
|
||||||
|
|
@ -92,9 +92,7 @@ if __name__ == "__main__":
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
|
||||||
embedding_dim=4096, max_token_size=8192, func=embedding_func
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
query_param = QueryParam(mode=mode)
|
query_param = QueryParam(mode=mode)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue