Add configurable embedding token limit with validation
- Add EMBEDDING_TOKEN_LIMIT env var - Set max_token_size on embedding func - Add token limit property to LightRAG - Validate summary length vs limit - Log warning when limit exceeded
This commit is contained in:
parent
f5b48587ed
commit
14a6c24ed7
4 changed files with 31 additions and 0 deletions
|
|
@ -445,6 +445,11 @@ def parse_args() -> argparse.Namespace:
|
|||
"EMBEDDING_BATCH_NUM", DEFAULT_EMBEDDING_BATCH_NUM, int
|
||||
)
|
||||
|
||||
# Embedding token limit configuration
|
||||
args.embedding_token_limit = get_env_value(
|
||||
"EMBEDDING_TOKEN_LIMIT", None, int, special_none=True
|
||||
)
|
||||
|
||||
ollama_server_infos.LIGHTRAG_NAME = args.simulated_model_name
|
||||
ollama_server_infos.LIGHTRAG_TAG = args.simulated_model_tag
|
||||
|
||||
|
|
|
|||
|
|
@ -807,6 +807,11 @@ def create_app(args):
|
|||
send_dimensions=send_dimensions,
|
||||
)
|
||||
|
||||
# Set max_token_size if EMBEDDING_TOKEN_LIMIT is provided
|
||||
if args.embedding_token_limit is not None:
|
||||
embedding_func.max_token_size = args.embedding_token_limit
|
||||
logger.info(f"Set embedding max_token_size to {args.embedding_token_limit}")
|
||||
|
||||
# Configure rerank function based on args.rerank_bindingparameter
|
||||
rerank_model_func = None
|
||||
if args.rerank_binding != "null":
|
||||
|
|
|
|||
|
|
@ -277,6 +277,13 @@ class LightRAG:
|
|||
embedding_func: EmbeddingFunc | None = field(default=None)
|
||||
"""Function for computing text embeddings. Must be set before use."""
|
||||
|
||||
@property
|
||||
def embedding_token_limit(self) -> int | None:
|
||||
"""Get the token limit for embedding model from embedding_func."""
|
||||
if self.embedding_func and hasattr(self.embedding_func, "max_token_size"):
|
||||
return self.embedding_func.max_token_size
|
||||
return None
|
||||
|
||||
embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM", 10)))
|
||||
"""Batch size for embedding computations."""
|
||||
|
||||
|
|
|
|||
|
|
@ -345,6 +345,20 @@ async def _summarize_descriptions(
|
|||
llm_response_cache=llm_response_cache,
|
||||
cache_type="summary",
|
||||
)
|
||||
|
||||
# Check summary token length against embedding limit
|
||||
embedding_token_limit = global_config.get("embedding_token_limit")
|
||||
if embedding_token_limit is not None and summary:
|
||||
tokenizer = global_config["tokenizer"]
|
||||
summary_token_count = len(tokenizer.encode(summary))
|
||||
threshold = int(embedding_token_limit * 0.9)
|
||||
|
||||
if summary_token_count > threshold:
|
||||
logger.warning(
|
||||
f"Summary tokens ({summary_token_count}) exceeds 90% of embedding limit "
|
||||
f"({embedding_token_limit}) for {description_type}: {description_name}"
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue