diff --git a/env.example b/env.example index 8d986b16..534bd22a 100644 --- a/env.example +++ b/env.example @@ -50,6 +50,8 @@ OLLAMA_EMULATING_MODEL_TAG=latest # JWT_ALGORITHM=HS256 ### API-Key to access LightRAG Server API +### Use this key in HTTP requests with the 'X-API-Key' header +### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query # LIGHTRAG_API_KEY=your-secure-api-key-here # WHITELIST_PATHS=/health,/api/* @@ -119,6 +121,9 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Document processing output language: English, Chinese, French, German ... SUMMARY_LANGUAGE=English +### PDF decryption password for protected PDF files +# PDF_DECRYPT_PASSWORD=your_pdf_password_here + ### Entity types that the LLM will attempt to recognize # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]' @@ -142,8 +147,9 @@ SUMMARY_LANGUAGE=English ### FIFO: First in first out ### KEEP: Keep oldest (less merge action and faster) # SOURCE_IDS_LIMIT_METHOD=FIFO -### Maximum number of file paths stored in entity/relation file_path field -# MAX_FILE_PATHS=30 + +# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance) +# MAX_FILE_PATHS=100 ### maximum number of related chunks per source entity or relation ### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) @@ -162,10 +168,11 @@ MAX_PARALLEL_INSERT=2 ### Num of chunks send to Embedding in single request # EMBEDDING_BATCH_NUM=10 -########################################################### +########################################################################### ### LLM Configuration -### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock -########################################################### +### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini +### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service +########################################################################### ### LLM request timeout setting for all llm (0 means no timeout for Ollma) # LLM_TIMEOUT=180 @@ -184,6 +191,15 @@ LLM_BINDING_API_KEY=your_api_key # LLM_BINDING_API_KEY=your_api_key # LLM_BINDING=openai +### Gemini example +# LLM_BINDING=gemini +# LLM_MODEL=gemini-flash-latest +# LLM_BINDING_API_KEY=your_gemini_api_key +# LLM_BINDING_HOST=https://generativelanguage.googleapis.com +GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' +# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000 +# GEMINI_LLM_TEMPERATURE=0.7 + ### OpenAI Compatible API Specific Parameters ### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. # OPENAI_LLM_TEMPERATURE=0.9 @@ -207,6 +223,7 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}' ### use the following command to see all support options for Ollama LLM +### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST ### lightrag-server --llm-binding ollama --help ### Ollama Server Specific Parameters ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000 @@ -219,16 +236,25 @@ OLLAMA_LLM_NUM_CTX=32768 ### Bedrock Specific Parameters # BEDROCK_LLM_TEMPERATURE=1.0 -#################################################################################### +####################################################################################### ### Embedding Configuration (Should not be changed after the first file processed) ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock -#################################################################################### +### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service +####################################################################################### # EMBEDDING_TIMEOUT=30 + +### Control whether to send embedding_dim parameter to embedding API +### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina +### For OpenAI: Set to 'true' to enable dynamic dimension adjustment +### For OpenAI: Set to 'false' (default) to disable sending dimension parameter +### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama) +# EMBEDDING_SEND_DIM=false + EMBEDDING_BINDING=ollama EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 EMBEDDING_BINDING_API_KEY=your_api_key -# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost +# If LightRAG deployed in Docker uses host.docker.internal instead of localhost EMBEDDING_BINDING_HOST=http://localhost:11434 ### OpenAI compatible (VoyageAI embedding openai compatible) @@ -390,21 +416,30 @@ MEMGRAPH_DATABASE=memgraph ### Evaluation Configuration ############################ ### RAGAS evaluation models (used for RAG quality assessment) +### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible ### Default uses OpenAI models for evaluation -# EVAL_LLM_MODEL=gpt-4.1 -# EVAL_EMBEDDING_MODEL=text-embedding-3-large -### API key for evaluation (fallback to OPENAI_API_KEY if not set) + +### LLM Configuration for Evaluation +# EVAL_LLM_MODEL=gpt-4o-mini +### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set) # EVAL_LLM_BINDING_API_KEY=your_api_key -### Custom endpoint for evaluation models (optional, for OpenAI-compatible services) +### Custom OpenAI-compatible endpoint for LLM evaluation (optional) # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1 -### Evaluation concurrency and rate limiting -### Number of concurrent test case evaluations (default: 1 for serial evaluation) +### Embedding Configuration for Evaluation +# EVAL_EMBEDDING_MODEL=text-embedding-3-large +### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY) +# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key +### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST) +# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1 + +### Performance Tuning +### Number of concurrent test case evaluations ### Lower values reduce API rate limit issues but increase evaluation time -# EVAL_MAX_CONCURRENT=3 +# EVAL_MAX_CONCURRENT=2 ### TOP_K query parameter of LightRAG (default: 10) ### Number of entities or relations retrieved from KG # EVAL_QUERY_TOP_K=10 ### LLM request retry and timeout settings for evaluation # EVAL_LLM_MAX_RETRIES=5 -# EVAL_LLM_TIMEOUT=120 +# EVAL_LLM_TIMEOUT=180 diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 70e17bb6..f36d88df 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -15,7 +15,6 @@ import logging.config import sys import uvicorn import pipmaster as pm -import inspect from fastapi.staticfiles import StaticFiles from fastapi.responses import RedirectResponse from pathlib import Path @@ -512,7 +511,9 @@ def create_app(args): return optimized_azure_openai_model_complete - def create_optimized_gemini_llm_func(config_cache: LLMConfigCache, args): + def create_optimized_gemini_llm_func( + config_cache: LLMConfigCache, args, llm_timeout: int + ): """Create optimized Gemini LLM function with cached configuration""" async def optimized_gemini_model_complete( @@ -527,6 +528,8 @@ def create_app(args): if history_messages is None: history_messages = [] + # Use pre-processed configuration to avoid repeated parsing + kwargs["timeout"] = llm_timeout if ( config_cache.gemini_llm_options is not None and "generation_config" not in kwargs @@ -568,7 +571,7 @@ def create_app(args): config_cache, args, llm_timeout ) elif binding == "gemini": - return create_optimized_gemini_llm_func(config_cache, args) + return create_optimized_gemini_llm_func(config_cache, args, llm_timeout) else: # openai and compatible # Use optimized function with pre-processed configuration return create_optimized_openai_llm_func(config_cache, args, llm_timeout) @@ -595,7 +598,7 @@ def create_app(args): return {} def create_optimized_embedding_function( - config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args + config_cache: LLMConfigCache, binding, model, host, api_key, args ): """ Create optimized embedding function with pre-processed configuration for applicable bindings. @@ -640,9 +643,7 @@ def create_app(args): elif binding == "jina": from lightrag.llm.jina import jina_embed - return await jina_embed( - texts, dimensions=dimensions, base_url=host, api_key=api_key - ) + return await jina_embed(texts, base_url=host, api_key=api_key) else: # openai and compatible from lightrag.llm.openai import openai_embed @@ -687,17 +688,49 @@ def create_app(args): ) # Create embedding function with optimized configuration + import inspect + + # Create the optimized embedding function + optimized_embedding_func = create_optimized_embedding_function( + config_cache=config_cache, + binding=args.embedding_binding, + model=args.embedding_model, + host=args.embedding_binding_host, + api_key=args.embedding_binding_api_key, + args=args, # Pass args object for fallback option generation + ) + + # Check environment variable for sending dimensions + embedding_send_dim = os.getenv("EMBEDDING_SEND_DIM", "false").lower() == "true" + + # Check if the function signature has embedding_dim parameter + # Note: Since optimized_embedding_func is an async function, inspect its signature + sig = inspect.signature(optimized_embedding_func) + has_embedding_dim_param = "embedding_dim" in sig.parameters + + # Determine send_dimensions value based on binding type + # Jina REQUIRES dimension parameter (forced to True) + # OpenAI and others: controlled by EMBEDDING_SEND_DIM environment variable + if args.embedding_binding == "jina": + # Jina API requires dimension parameter - always send it + send_dimensions = has_embedding_dim_param + dimension_control = "forced (Jina API requirement)" + else: + # For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting + send_dimensions = embedding_send_dim and has_embedding_dim_param + dimension_control = f"env_var={embedding_send_dim}" + + logger.info( + f"Embedding configuration: send_dimensions={send_dimensions} " + f"({dimension_control}, has_param={has_embedding_dim_param}, " + f"binding={args.embedding_binding})" + ) + + # Create EmbeddingFunc with send_dimensions attribute embedding_func = EmbeddingFunc( embedding_dim=args.embedding_dim, - func=create_optimized_embedding_function( - config_cache=config_cache, - binding=args.embedding_binding, - model=args.embedding_model, - host=args.embedding_binding_host, - api_key=args.embedding_binding_api_key, - dimensions=args.embedding_dim, - args=args, # Pass args object for fallback option generation - ), + func=optimized_embedding_func, + send_dimensions=send_dimensions, ) # Configure rerank function based on args.rerank_bindingparameter diff --git a/lightrag/utils.py b/lightrag/utils.py index bfa3cac4..460ede3c 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -353,8 +353,29 @@ class EmbeddingFunc: embedding_dim: int func: callable max_token_size: int | None = None # deprecated keep it for compatible only + send_dimensions: bool = ( + False # Control whether to send embedding_dim to the function + ) async def __call__(self, *args, **kwargs) -> np.ndarray: + # Only inject embedding_dim when send_dimensions is True + if self.send_dimensions: + # Check if user provided embedding_dim parameter + if "embedding_dim" in kwargs: + user_provided_dim = kwargs["embedding_dim"] + # If user's value differs from class attribute, output warning + if ( + user_provided_dim is not None + and user_provided_dim != self.embedding_dim + ): + logger.warning( + f"Ignoring user-provided embedding_dim={user_provided_dim}, " + f"using declared embedding_dim={self.embedding_dim} from decorator" + ) + + # Inject embedding_dim from decorator + kwargs["embedding_dim"] = self.embedding_dim + return await self.func(*args, **kwargs) @@ -1795,7 +1816,7 @@ def normalize_extracted_info(name: str, remove_inner_quotes=False) -> str: - Filter out short numeric-only text (length < 3 and only digits/dots) - remove_inner_quotes = True remove Chinese quotes - remove English queotes in and around chinese + remove English quotes in and around chinese Convert non-breaking spaces to regular spaces Convert narrow non-breaking spaces after non-digits to regular spaces @@ -2551,6 +2572,52 @@ def apply_source_ids_limit( return truncated +def compute_incremental_chunk_ids( + existing_full_chunk_ids: list[str], + old_chunk_ids: list[str], + new_chunk_ids: list[str], +) -> list[str]: + """ + Compute incrementally updated chunk IDs based on changes. + + This function applies delta changes (additions and removals) to an existing + list of chunk IDs while maintaining order and ensuring deduplication. + Delta additions from new_chunk_ids are placed at the end. + + Args: + existing_full_chunk_ids: Complete list of existing chunk IDs from storage + old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced) + new_chunk_ids: New chunk IDs from updated source_id (chunks being added) + + Returns: + Updated list of chunk IDs with deduplication + + Example: + >>> existing = ['chunk-1', 'chunk-2', 'chunk-3'] + >>> old = ['chunk-1', 'chunk-2'] + >>> new = ['chunk-2', 'chunk-4'] + >>> compute_incremental_chunk_ids(existing, old, new) + ['chunk-3', 'chunk-2', 'chunk-4'] + """ + # Calculate changes + chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids) + chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids) + + # Apply changes to full chunk_ids + # Step 1: Remove chunks that are no longer needed + updated_chunk_ids = [ + cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove + ] + + # Step 2: Add new chunks (preserving order from new_chunk_ids) + # Note: 'cid not in updated_chunk_ids' check ensures deduplication + for cid in new_chunk_ids: + if cid in chunks_to_add and cid not in updated_chunk_ids: + updated_chunk_ids.append(cid) + + return updated_chunk_ids + + def subtract_source_ids( source_ids: Iterable[str], ids_to_remove: Collection[str],