Merge pull request #1875 from danielaskdd/remove-embedding-max-token-size
refactor: Remove deprecated `max_token_size` from embedding configura…
This commit is contained in:
commit
7a5df185a5
19 changed files with 13 additions and 40 deletions
|
|
@ -396,7 +396,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=4096,
|
embedding_dim=4096,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func
|
func=embedding_func
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -425,7 +424,6 @@ rag = LightRAG(
|
||||||
# 使用Hugging Face嵌入函数
|
# 使用Hugging Face嵌入函数
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=5000,
|
|
||||||
func=lambda texts: hf_embed(
|
func=lambda texts: hf_embed(
|
||||||
texts,
|
texts,
|
||||||
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
||||||
|
|
@ -452,7 +450,6 @@ rag = LightRAG(
|
||||||
# 使用Ollama嵌入函数
|
# 使用Ollama嵌入函数
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -504,7 +501,6 @@ rag = LightRAG(
|
||||||
# 使用Ollama嵌入函数
|
# 使用Ollama嵌入函数
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -547,7 +543,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
|
llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数
|
||||||
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
|
embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
@ -809,7 +804,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func,
|
func=embedding_func,
|
||||||
),
|
),
|
||||||
vector_storage="FaissVectorDBStorage",
|
vector_storage="FaissVectorDBStorage",
|
||||||
|
|
@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
||||||
),
|
),
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=3072,
|
embedding_dim=3072,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: openai_embed(
|
func=lambda texts: openai_embed(
|
||||||
texts,
|
texts,
|
||||||
model="text-embedding-3-large",
|
model="text-embedding-3-large",
|
||||||
|
|
|
||||||
|
|
@ -397,7 +397,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=4096,
|
embedding_dim=4096,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func
|
func=embedding_func
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -426,7 +425,6 @@ rag = LightRAG(
|
||||||
# Use Hugging Face embedding function
|
# Use Hugging Face embedding function
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=5000,
|
|
||||||
func=lambda texts: hf_embed(
|
func=lambda texts: hf_embed(
|
||||||
texts,
|
texts,
|
||||||
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
|
||||||
|
|
@ -455,7 +453,6 @@ rag = LightRAG(
|
||||||
# Use Ollama embedding function
|
# Use Ollama embedding function
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -507,7 +504,6 @@ rag = LightRAG(
|
||||||
# Use Ollama embedding function
|
# Use Ollama embedding function
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: ollama_embed(
|
func=lambda texts: ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model="nomic-embed-text"
|
embed_model="nomic-embed-text"
|
||||||
|
|
@ -550,7 +546,6 @@ async def initialize_rag():
|
||||||
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
|
llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function
|
||||||
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
|
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
@ -872,7 +867,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=384,
|
embedding_dim=384,
|
||||||
max_token_size=8192,
|
|
||||||
func=embedding_func,
|
func=embedding_func,
|
||||||
),
|
),
|
||||||
vector_storage="FaissVectorDBStorage",
|
vector_storage="FaissVectorDBStorage",
|
||||||
|
|
@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
||||||
),
|
),
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=3072,
|
embedding_dim=3072,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: openai_embed(
|
func=lambda texts: openai_embed(
|
||||||
texts,
|
texts,
|
||||||
model="text-embedding-3-large",
|
model="text-embedding-3-large",
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file:
|
||||||
- `MAX_ASYNC`: Maximum async operations
|
- `MAX_ASYNC`: Maximum async operations
|
||||||
- `MAX_TOKENS`: Maximum token size
|
- `MAX_TOKENS`: Maximum token size
|
||||||
- `EMBEDDING_DIM`: Embedding dimensions
|
- `EMBEDDING_DIM`: Embedding dimensions
|
||||||
- `MAX_EMBED_TOKENS`: Maximum embedding token size
|
|
||||||
|
|
||||||
#### Security
|
#### Security
|
||||||
- `LIGHTRAG_API_KEY`: API key for authentication
|
- `LIGHTRAG_API_KEY`: API key for authentication
|
||||||
|
|
|
||||||
|
|
@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key
|
||||||
### Embedding Configuration (Should not be changed after the first file processed)
|
### Embedding Configuration (Should not be changed after the first file processed)
|
||||||
####################################################################################
|
####################################################################################
|
||||||
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
|
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
|
||||||
|
|
||||||
|
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||||
EMBEDDING_BINDING=ollama
|
EMBEDDING_BINDING=ollama
|
||||||
EMBEDDING_MODEL=bge-m3:latest
|
EMBEDDING_MODEL=bge-m3:latest
|
||||||
EMBEDDING_DIM=1024
|
EMBEDDING_DIM=1024
|
||||||
EMBEDDING_BINDING_API_KEY=your_api_key
|
EMBEDDING_BINDING_API_KEY=your_api_key
|
||||||
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
||||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||||
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
|
|
||||||
# MAX_EMBED_TOKENS=8192
|
|
||||||
|
|
||||||
### OpenAI compatible
|
### OpenAI compatible
|
||||||
# EMBEDDING_BINDING=openai
|
# EMBEDDING_BINDING=openai
|
||||||
|
|
|
||||||
|
|
@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace:
|
||||||
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
||||||
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
||||||
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
||||||
args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
|
|
||||||
|
|
||||||
# Inject chunk configuration
|
# Inject chunk configuration
|
||||||
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
||||||
|
|
|
||||||
|
|
@ -273,7 +273,6 @@ def create_app(args):
|
||||||
|
|
||||||
embedding_func = EmbeddingFunc(
|
embedding_func = EmbeddingFunc(
|
||||||
embedding_dim=args.embedding_dim,
|
embedding_dim=args.embedding_dim,
|
||||||
max_token_size=args.max_embed_tokens,
|
|
||||||
func=lambda texts: lollms_embed(
|
func=lambda texts: lollms_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=args.embedding_model,
|
embed_model=args.embedding_model,
|
||||||
|
|
|
||||||
|
|
@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
||||||
ASCIIColors.yellow(f"{args.summary_language}")
|
ASCIIColors.yellow(f"{args.summary_language}")
|
||||||
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
|
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.max_parallel_insert}")
|
ASCIIColors.yellow(f"{args.max_parallel_insert}")
|
||||||
ASCIIColors.white(" ├─ Max Embed Tokens: ", end="")
|
|
||||||
ASCIIColors.yellow(f"{args.max_embed_tokens}")
|
|
||||||
ASCIIColors.white(" ├─ Chunk Size: ", end="")
|
ASCIIColors.white(" ├─ Chunk Size: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.chunk_size}")
|
ASCIIColors.yellow(f"{args.chunk_size}")
|
||||||
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(
|
func=lambda texts: llama_index_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=OpenAIEmbedding(
|
embed_model=OpenAIEmbedding(
|
||||||
|
|
@ -114,7 +113,6 @@ rag = LightRAG(
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=1536,
|
embedding_dim=1536,
|
||||||
max_token_size=8192,
|
|
||||||
func=lambda texts: llama_index_embed(
|
func=lambda texts: llama_index_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=LiteLLMEmbedding(
|
embed_model=LiteLLMEmbedding(
|
||||||
|
|
@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key
|
||||||
# Model Configuration
|
# Model Configuration
|
||||||
LLM_MODEL=gpt-4
|
LLM_MODEL=gpt-4
|
||||||
EMBEDDING_MODEL=text-embedding-3-large
|
EMBEDDING_MODEL=text-embedding-3-large
|
||||||
EMBEDDING_MAX_TOKEN_SIZE=8192
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Key Differences
|
### Key Differences
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ async def azure_openai_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,7 @@ async def bedrock_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
# @wrap_embedding_func_with_attrs(embedding_dim=1024)
|
||||||
# @retry(
|
# @retry(
|
||||||
# stop=stop_after_attempt(3),
|
# stop=stop_after_attempt(3),
|
||||||
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ async def fetch_data(url, headers, data):
|
||||||
return data_list
|
return data_list
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ async def llama_index_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ from lightrag.utils import (
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
|
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -432,7 +432,7 @@ async def nvidia_openai_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ async def siliconcloud_embedding(
|
||||||
texts: list[str],
|
texts: list[str],
|
||||||
model: str = "netease-youdao/bce-embedding-base_v1",
|
model: str = "netease-youdao/bce-embedding-base_v1",
|
||||||
base_url: str = "https://api.siliconflow.cn/v1/embeddings",
|
base_url: str = "https://api.siliconflow.cn/v1/embeddings",
|
||||||
max_token_size: int = 512,
|
max_token_size: int = 8192,
|
||||||
api_key: str = None,
|
api_key: str = None,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
if api_key and not api_key.startswith("Bearer "):
|
if api_key and not api_key.startswith("Bearer "):
|
||||||
|
|
|
||||||
|
|
@ -167,7 +167,7 @@ async def zhipu_complete(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=1024)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -237,9 +237,8 @@ class UnlimitedSemaphore:
|
||||||
@dataclass
|
@dataclass
|
||||||
class EmbeddingFunc:
|
class EmbeddingFunc:
|
||||||
embedding_dim: int
|
embedding_dim: int
|
||||||
max_token_size: int
|
|
||||||
func: callable
|
func: callable
|
||||||
# concurrent_limit: int = 16
|
max_token_size: int | None = None # deprecated keep it for compatible only
|
||||||
|
|
||||||
async def __call__(self, *args, **kwargs) -> np.ndarray:
|
async def __call__(self, *args, **kwargs) -> np.ndarray:
|
||||||
return await self.func(*args, **kwargs)
|
return await self.func(*args, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -67,9 +67,7 @@ async def initialize_rag():
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
|
||||||
embedding_dim=4096, max_token_size=8192, func=embedding_func
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await rag.initialize_storages()
|
await rag.initialize_storages()
|
||||||
|
|
|
||||||
|
|
@ -92,9 +92,7 @@ if __name__ == "__main__":
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func),
|
||||||
embedding_dim=4096, max_token_size=8192, func=embedding_func
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
query_param = QueryParam(mode=mode)
|
query_param = QueryParam(mode=mode)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue