diff --git a/README-zh.md b/README-zh.md index 707c7ab5..42ab71f5 100644 --- a/README-zh.md +++ b/README-zh.md @@ -396,7 +396,6 @@ async def initialize_rag(): llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=4096, - max_token_size=8192, func=embedding_func ) ) @@ -425,7 +424,6 @@ rag = LightRAG( # 使用Hugging Face嵌入函数 embedding_func=EmbeddingFunc( embedding_dim=384, - max_token_size=5000, func=lambda texts: hf_embed( texts, tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), @@ -452,7 +450,6 @@ rag = LightRAG( # 使用Ollama嵌入函数 embedding_func=EmbeddingFunc( embedding_dim=768, - max_token_size=8192, func=lambda texts: ollama_embed( texts, embed_model="nomic-embed-text" @@ -504,7 +501,6 @@ rag = LightRAG( # 使用Ollama嵌入函数 embedding_func=EmbeddingFunc( embedding_dim=768, - max_token_size=8192, func=lambda texts: ollama_embed( texts, embed_model="nomic-embed-text" @@ -547,7 +543,6 @@ async def initialize_rag(): llm_model_func=llama_index_complete_if_cache, # LlamaIndex兼容的完成函数 embedding_func=EmbeddingFunc( # LlamaIndex兼容的嵌入函数 embedding_dim=1536, - max_token_size=8192, func=lambda texts: llama_index_embed(texts, embed_model=embed_model) ), ) @@ -809,7 +804,6 @@ rag = LightRAG( llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=384, - max_token_size=8192, func=embedding_func, ), vector_storage="FaissVectorDBStorage", @@ -1229,7 +1223,6 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现 ), embedding_func=EmbeddingFunc( embedding_dim=3072, - max_token_size=8192, func=lambda texts: openai_embed( texts, model="text-embedding-3-large", diff --git a/README.md b/README.md index 1af28d3b..74ac132c 100644 --- a/README.md +++ b/README.md @@ -397,7 +397,6 @@ async def initialize_rag(): llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=4096, - max_token_size=8192, func=embedding_func ) ) @@ -426,7 +425,6 @@ rag = LightRAG( # Use Hugging Face embedding function embedding_func=EmbeddingFunc( embedding_dim=384, - max_token_size=5000, func=lambda texts: hf_embed( texts, tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), @@ -455,7 +453,6 @@ rag = LightRAG( # Use Ollama embedding function embedding_func=EmbeddingFunc( embedding_dim=768, - max_token_size=8192, func=lambda texts: ollama_embed( texts, embed_model="nomic-embed-text" @@ -507,7 +504,6 @@ rag = LightRAG( # Use Ollama embedding function embedding_func=EmbeddingFunc( embedding_dim=768, - max_token_size=8192, func=lambda texts: ollama_embed( texts, embed_model="nomic-embed-text" @@ -550,7 +546,6 @@ async def initialize_rag(): llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function embedding_dim=1536, - max_token_size=8192, func=lambda texts: llama_index_embed(texts, embed_model=embed_model) ), ) @@ -872,7 +867,6 @@ rag = LightRAG( llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=384, - max_token_size=8192, func=embedding_func, ), vector_storage="FaissVectorDBStorage", @@ -1278,7 +1272,6 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/ ), embedding_func=EmbeddingFunc( embedding_dim=3072, - max_token_size=8192, func=lambda texts: openai_embed( texts, model="text-embedding-3-large", diff --git a/docs/DockerDeployment.md b/docs/DockerDeployment.md index e7955cf8..72d7da8e 100644 --- a/docs/DockerDeployment.md +++ b/docs/DockerDeployment.md @@ -84,7 +84,6 @@ LightRAG can be configured using environment variables in the `.env` file: - `MAX_ASYNC`: Maximum async operations - `MAX_TOKENS`: Maximum token size - `EMBEDDING_DIM`: Embedding dimensions -- `MAX_EMBED_TOKENS`: Maximum embedding token size #### Security - `LIGHTRAG_API_KEY`: API key for authentication diff --git a/env.example b/env.example index 79cd1a13..850538fa 100644 --- a/env.example +++ b/env.example @@ -130,14 +130,14 @@ LLM_BINDING_API_KEY=your_api_key ### Embedding Configuration (Should not be changed after the first file processed) #################################################################################### ### Embedding Binding type: openai, ollama, lollms, azure_openai, jina + +### see also env.ollama-binding-options.example for fine tuning ollama EMBEDDING_BINDING=ollama EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 EMBEDDING_BINDING_API_KEY=your_api_key # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost EMBEDDING_BINDING_HOST=http://localhost:11434 -### Maximum tokens sent to Embedding for each chunk (no longer in use?) -# MAX_EMBED_TOKENS=8192 ### OpenAI compatible # EMBEDDING_BINDING=openai diff --git a/lightrag/api/config.py b/lightrag/api/config.py index cf8c0492..befedc42 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -320,7 +320,6 @@ def parse_args() -> argparse.Namespace: args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest") args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest") args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int) - args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int) # Inject chunk configuration args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 8845e06c..5ac100a6 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -273,7 +273,6 @@ def create_app(args): embedding_func = EmbeddingFunc( embedding_dim=args.embedding_dim, - max_token_size=args.max_embed_tokens, func=lambda texts: lollms_embed( texts, embed_model=args.embedding_model, diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index ad5d13c9..90a1eb96 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -268,8 +268,6 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.summary_language}") ASCIIColors.white(" ├─ Max Parallel Insert: ", end="") ASCIIColors.yellow(f"{args.max_parallel_insert}") - ASCIIColors.white(" ├─ Max Embed Tokens: ", end="") - ASCIIColors.yellow(f"{args.max_embed_tokens}") ASCIIColors.white(" ├─ Chunk Size: ", end="") ASCIIColors.yellow(f"{args.chunk_size}") ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="") diff --git a/lightrag/llm/Readme.md b/lightrag/llm/Readme.md index 969d70e3..c907fd4d 100644 --- a/lightrag/llm/Readme.md +++ b/lightrag/llm/Readme.md @@ -58,7 +58,6 @@ rag = LightRAG( llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=1536, - max_token_size=8192, func=lambda texts: llama_index_embed( texts, embed_model=OpenAIEmbedding( @@ -114,7 +113,6 @@ rag = LightRAG( llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=1536, - max_token_size=8192, func=lambda texts: llama_index_embed( texts, embed_model=LiteLLMEmbedding( @@ -143,7 +141,6 @@ LITELLM_KEY=your-litellm-key # Model Configuration LLM_MODEL=gpt-4 EMBEDDING_MODEL=text-embedding-3-large -EMBEDDING_MAX_TOKEN_SIZE=8192 ``` ### Key Differences diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py index e2fcedbb..ecec0fcc 100644 --- a/lightrag/llm/azure_openai.py +++ b/lightrag/llm/azure_openai.py @@ -121,7 +121,7 @@ async def azure_openai_complete( return result -@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191) +@wrap_embedding_func_with_attrs(embedding_dim=1536) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), diff --git a/lightrag/llm/bedrock.py b/lightrag/llm/bedrock.py index d243983c..e1edc7ff 100644 --- a/lightrag/llm/bedrock.py +++ b/lightrag/llm/bedrock.py @@ -110,7 +110,7 @@ async def bedrock_complete( return result -# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) +# @wrap_embedding_func_with_attrs(embedding_dim=1024) # @retry( # stop=stop_after_attempt(3), # wait=wait_exponential(multiplier=1, min=4, max=10), diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py index 6a1e95d2..5a1b59fb 100644 --- a/lightrag/llm/jina.py +++ b/lightrag/llm/jina.py @@ -35,7 +35,7 @@ async def fetch_data(url, headers, data): return data_list -@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=2048) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/llama_index_impl.py b/lightrag/llm/llama_index_impl.py index b8dc39f3..a88d830f 100644 --- a/lightrag/llm/llama_index_impl.py +++ b/lightrag/llm/llama_index_impl.py @@ -170,7 +170,7 @@ async def llama_index_complete( return result -@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=1536) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/nvidia_openai.py b/lightrag/llm/nvidia_openai.py index e711c4ac..1cbab380 100644 --- a/lightrag/llm/nvidia_openai.py +++ b/lightrag/llm/nvidia_openai.py @@ -33,7 +33,7 @@ from lightrag.utils import ( import numpy as np -@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512) +@wrap_embedding_func_with_attrs(embedding_dim=2048) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index ff10d2f5..cedd804d 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -432,7 +432,7 @@ async def nvidia_openai_complete( return result -@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=1536) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/siliconcloud.py b/lightrag/llm/siliconcloud.py index 41adb96e..fe8da0dd 100644 --- a/lightrag/llm/siliconcloud.py +++ b/lightrag/llm/siliconcloud.py @@ -40,7 +40,7 @@ async def siliconcloud_embedding( texts: list[str], model: str = "netease-youdao/bce-embedding-base_v1", base_url: str = "https://api.siliconflow.cn/v1/embeddings", - max_token_size: int = 512, + max_token_size: int = 8192, api_key: str = None, ) -> np.ndarray: if api_key and not api_key.startswith("Bearer "): diff --git a/lightrag/llm/zhipu.py b/lightrag/llm/zhipu.py index dede42dc..c9d1253e 100644 --- a/lightrag/llm/zhipu.py +++ b/lightrag/llm/zhipu.py @@ -167,7 +167,7 @@ async def zhipu_complete( ) -@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=1024) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/utils.py b/lightrag/utils.py index 2fd67f0e..5f4c5a12 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -237,9 +237,8 @@ class UnlimitedSemaphore: @dataclass class EmbeddingFunc: embedding_dim: int - max_token_size: int func: callable - # concurrent_limit: int = 16 + max_token_size: int | None = None # deprecated keep it for compatible only async def __call__(self, *args, **kwargs) -> np.ndarray: return await self.func(*args, **kwargs) diff --git a/reproduce/Step_1_openai_compatible.py b/reproduce/Step_1_openai_compatible.py index 3b9944eb..8093a9ee 100644 --- a/reproduce/Step_1_openai_compatible.py +++ b/reproduce/Step_1_openai_compatible.py @@ -67,9 +67,7 @@ async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=4096, max_token_size=8192, func=embedding_func - ), + embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func), ) await rag.initialize_storages() diff --git a/reproduce/Step_3_openai_compatible.py b/reproduce/Step_3_openai_compatible.py index 56159ee1..d163cce9 100644 --- a/reproduce/Step_3_openai_compatible.py +++ b/reproduce/Step_3_openai_compatible.py @@ -92,9 +92,7 @@ if __name__ == "__main__": rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=4096, max_token_size=8192, func=embedding_func - ), + embedding_func=EmbeddingFunc(embedding_dim=4096, func=embedding_func), ) query_param = QueryParam(mode=mode)