From 14a6c24ed75abc45f7304d4e8368c59a50fe4684 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 14 Nov 2025 19:28:36 +0800 Subject: [PATCH] Add configurable embedding token limit with validation - Add EMBEDDING_TOKEN_LIMIT env var - Set max_token_size on embedding func - Add token limit property to LightRAG - Validate summary length vs limit - Log warning when limit exceeded --- lightrag/api/config.py | 5 +++++ lightrag/api/lightrag_server.py | 5 +++++ lightrag/lightrag.py | 7 +++++++ lightrag/operate.py | 14 ++++++++++++++ 4 files changed, 31 insertions(+) diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 95ab9f70..4f59d3c1 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -445,6 +445,11 @@ def parse_args() -> argparse.Namespace: "EMBEDDING_BATCH_NUM", DEFAULT_EMBEDDING_BATCH_NUM, int ) + # Embedding token limit configuration + args.embedding_token_limit = get_env_value( + "EMBEDDING_TOKEN_LIMIT", None, int, special_none=True + ) + ollama_server_infos.LIGHTRAG_NAME = args.simulated_model_name ollama_server_infos.LIGHTRAG_TAG = args.simulated_model_tag diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 04ce8029..7f838f14 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -807,6 +807,11 @@ def create_app(args): send_dimensions=send_dimensions, ) + # Set max_token_size if EMBEDDING_TOKEN_LIMIT is provided + if args.embedding_token_limit is not None: + embedding_func.max_token_size = args.embedding_token_limit + logger.info(f"Set embedding max_token_size to {args.embedding_token_limit}") + # Configure rerank function based on args.rerank_bindingparameter rerank_model_func = None if args.rerank_binding != "null": diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 277eaf85..67ec2308 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -277,6 +277,13 @@ class LightRAG: embedding_func: EmbeddingFunc | None = field(default=None) """Function for computing text embeddings. Must be set before use.""" + @property + def embedding_token_limit(self) -> int | None: + """Get the token limit for embedding model from embedding_func.""" + if self.embedding_func and hasattr(self.embedding_func, "max_token_size"): + return self.embedding_func.max_token_size + return None + embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM", 10))) """Batch size for embedding computations.""" diff --git a/lightrag/operate.py b/lightrag/operate.py index ae2be49e..858553b1 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -345,6 +345,20 @@ async def _summarize_descriptions( llm_response_cache=llm_response_cache, cache_type="summary", ) + + # Check summary token length against embedding limit + embedding_token_limit = global_config.get("embedding_token_limit") + if embedding_token_limit is not None and summary: + tokenizer = global_config["tokenizer"] + summary_token_count = len(tokenizer.encode(summary)) + threshold = int(embedding_token_limit * 0.9) + + if summary_token_count > threshold: + logger.warning( + f"Summary tokens ({summary_token_count}) exceeds 90% of embedding limit " + f"({embedding_token_limit}) for {description_type}: {description_name}" + ) + return summary