cherry-pick c14f25b7

2025-12-04 19:19:01 +08:00 · 2025-12-04 19:19:01 +08:00 · 44a05f7a25
commit 44a05f7a25
parent 3b986f046f
3 changed files with 168 additions and 33 deletions
--- a/env.example
+++ b/env.example
@ -50,6 +50,8 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 # JWT_ALGORITHM=HS256

 ### API-Key to access LightRAG Server API
+### Use this key in HTTP requests with the 'X-API-Key' header
+### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
 # LIGHTRAG_API_KEY=your-secure-api-key-here
 # WHITELIST_PATHS=/health,/api/*

@ -119,6 +121,9 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Document processing output language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English

+### PDF decryption password for protected PDF files
+# PDF_DECRYPT_PASSWORD=your_pdf_password_here
+
 ### Entity types that the LLM will attempt to recognize
 # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'

@ -142,8 +147,9 @@ SUMMARY_LANGUAGE=English
 ###    FIFO: First in first out
 ###    KEEP: Keep oldest (less merge action and faster)
 # SOURCE_IDS_LIMIT_METHOD=FIFO
-### Maximum number of file paths stored in entity/relation file_path field
-# MAX_FILE_PATHS=30
+
+# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
+# MAX_FILE_PATHS=100

 ### maximum number of related chunks per source entity or relation
 ###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
@ -162,10 +168,11 @@ MAX_PARALLEL_INSERT=2
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=10

-###########################################################
+###########################################################################
 ### LLM Configuration
-### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
-###########################################################
+### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
+### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
+###########################################################################
 ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
 # LLM_TIMEOUT=180

@ -184,6 +191,15 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai

+### Gemini example
+# LLM_BINDING=gemini
+# LLM_MODEL=gemini-flash-latest
+# LLM_BINDING_API_KEY=your_gemini_api_key
+# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
+GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
+# GEMINI_LLM_TEMPERATURE=0.7
+
 ### OpenAI Compatible API Specific Parameters
 ### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
 # OPENAI_LLM_TEMPERATURE=0.9
@ -207,6 +223,7 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'

 ### use the following command to see all support options for Ollama LLM
+### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST
 ### lightrag-server --llm-binding ollama --help
 ### Ollama Server Specific Parameters
 ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
@ -219,16 +236,25 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Bedrock Specific Parameters
 # BEDROCK_LLM_TEMPERATURE=1.0

-####################################################################################
+#######################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
-####################################################################################
+### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
+#######################################################################################
 # EMBEDDING_TIMEOUT=30
+
+### Control whether to send embedding_dim parameter to embedding API
+### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
+### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
+### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
+### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
+# EMBEDDING_SEND_DIM=false
+
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
-# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
+# If LightRAG deployed in Docker uses host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434

 ### OpenAI compatible (VoyageAI embedding openai compatible)
@ -390,21 +416,30 @@ MEMGRAPH_DATABASE=memgraph
 ### Evaluation Configuration
 ############################
 ### RAGAS evaluation models (used for RAG quality assessment)
+### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
 ### Default uses OpenAI models for evaluation
-# EVAL_LLM_MODEL=gpt-4.1
-# EVAL_EMBEDDING_MODEL=text-embedding-3-large
-### API key for evaluation (fallback to OPENAI_API_KEY if not set)
+
+### LLM Configuration for Evaluation
+# EVAL_LLM_MODEL=gpt-4o-mini
+### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
 # EVAL_LLM_BINDING_API_KEY=your_api_key
-### Custom endpoint for evaluation models (optional, for OpenAI-compatible services)
+### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
 # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1

-### Evaluation concurrency and rate limiting
-### Number of concurrent test case evaluations (default: 1 for serial evaluation)
+### Embedding Configuration for Evaluation
+# EVAL_EMBEDDING_MODEL=text-embedding-3-large
+### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
+# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
+### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
+# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+
+### Performance Tuning
+### Number of concurrent test case evaluations
 ### Lower values reduce API rate limit issues but increase evaluation time
-# EVAL_MAX_CONCURRENT=3
+# EVAL_MAX_CONCURRENT=2
 ### TOP_K query parameter of LightRAG (default: 10)
 ### Number of entities or relations retrieved from KG
 # EVAL_QUERY_TOP_K=10
 ### LLM request retry and timeout settings for evaluation
 # EVAL_LLM_MAX_RETRIES=5
-# EVAL_LLM_TIMEOUT=120
+# EVAL_LLM_TIMEOUT=180
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -15,7 +15,6 @@ import logging.config
 import sys
 import uvicorn
 import pipmaster as pm
-import inspect
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import RedirectResponse
 from pathlib import Path
@ -512,7 +511,9 @@ def create_app(args):

        return optimized_azure_openai_model_complete

-    def create_optimized_gemini_llm_func(config_cache: LLMConfigCache, args):
+    def create_optimized_gemini_llm_func(
+        config_cache: LLMConfigCache, args, llm_timeout: int
+    ):
        """Create optimized Gemini LLM function with cached configuration"""

        async def optimized_gemini_model_complete(
@ -527,6 +528,8 @@ def create_app(args):
            if history_messages is None:
                history_messages = []

+            # Use pre-processed configuration to avoid repeated parsing
+            kwargs["timeout"] = llm_timeout
            if (
                config_cache.gemini_llm_options is not None
                and "generation_config" not in kwargs
@ -568,7 +571,7 @@ def create_app(args):
                    config_cache, args, llm_timeout
                )
            elif binding == "gemini":
-                return create_optimized_gemini_llm_func(config_cache, args)
+                return create_optimized_gemini_llm_func(config_cache, args, llm_timeout)
            else:  # openai and compatible
                # Use optimized function with pre-processed configuration
                return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
@ -595,7 +598,7 @@ def create_app(args):
        return {}

    def create_optimized_embedding_function(
-        config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
+        config_cache: LLMConfigCache, binding, model, host, api_key, args
    ):
        """
        Create optimized embedding function with pre-processed configuration for applicable bindings.
@ -640,9 +643,7 @@ def create_app(args):
                elif binding == "jina":
                    from lightrag.llm.jina import jina_embed

-                    return await jina_embed(
-                        texts, dimensions=dimensions, base_url=host, api_key=api_key
-                    )
+                    return await jina_embed(texts, base_url=host, api_key=api_key)
                else:  # openai and compatible
                    from lightrag.llm.openai import openai_embed

@ -687,17 +688,49 @@ def create_app(args):
        )

    # Create embedding function with optimized configuration
+    import inspect
+
+    # Create the optimized embedding function
+    optimized_embedding_func = create_optimized_embedding_function(
+        config_cache=config_cache,
+        binding=args.embedding_binding,
+        model=args.embedding_model,
+        host=args.embedding_binding_host,
+        api_key=args.embedding_binding_api_key,
+        args=args,  # Pass args object for fallback option generation
+    )
+
+    # Check environment variable for sending dimensions
+    embedding_send_dim = os.getenv("EMBEDDING_SEND_DIM", "false").lower() == "true"
+
+    # Check if the function signature has embedding_dim parameter
+    # Note: Since optimized_embedding_func is an async function, inspect its signature
+    sig = inspect.signature(optimized_embedding_func)
+    has_embedding_dim_param = "embedding_dim" in sig.parameters
+
+    # Determine send_dimensions value based on binding type
+    # Jina REQUIRES dimension parameter (forced to True)
+    # OpenAI and others: controlled by EMBEDDING_SEND_DIM environment variable
+    if args.embedding_binding == "jina":
+        # Jina API requires dimension parameter - always send it
+        send_dimensions = has_embedding_dim_param
+        dimension_control = "forced (Jina API requirement)"
+    else:
+        # For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting
+        send_dimensions = embedding_send_dim and has_embedding_dim_param
+        dimension_control = f"env_var={embedding_send_dim}"
+
+    logger.info(
+        f"Embedding configuration: send_dimensions={send_dimensions} "
+        f"({dimension_control}, has_param={has_embedding_dim_param}, "
+        f"binding={args.embedding_binding})"
+    )
+
+    # Create EmbeddingFunc with send_dimensions attribute
    embedding_func = EmbeddingFunc(
        embedding_dim=args.embedding_dim,
-        func=create_optimized_embedding_function(
-            config_cache=config_cache,
-            binding=args.embedding_binding,
-            model=args.embedding_model,
-            host=args.embedding_binding_host,
-            api_key=args.embedding_binding_api_key,
-            dimensions=args.embedding_dim,
-            args=args,  # Pass args object for fallback option generation
-        ),
+        func=optimized_embedding_func,
+        send_dimensions=send_dimensions,
    )

    # Configure rerank function based on args.rerank_bindingparameter
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -353,8 +353,29 @@ class EmbeddingFunc:
    embedding_dim: int
    func: callable
    max_token_size: int | None = None  # deprecated keep it for compatible only
+    send_dimensions: bool = (
+        False  # Control whether to send embedding_dim to the function
+    )

    async def __call__(self, *args, **kwargs) -> np.ndarray:
+        # Only inject embedding_dim when send_dimensions is True
+        if self.send_dimensions:
+            # Check if user provided embedding_dim parameter
+            if "embedding_dim" in kwargs:
+                user_provided_dim = kwargs["embedding_dim"]
+                # If user's value differs from class attribute, output warning
+                if (
+                    user_provided_dim is not None
+                    and user_provided_dim != self.embedding_dim
+                ):
+                    logger.warning(
+                        f"Ignoring user-provided embedding_dim={user_provided_dim}, "
+                        f"using declared embedding_dim={self.embedding_dim} from decorator"
+                    )
+
+            # Inject embedding_dim from decorator
+            kwargs["embedding_dim"] = self.embedding_dim
+
        return await self.func(*args, **kwargs)


@ -1795,7 +1816,7 @@ def normalize_extracted_info(name: str, remove_inner_quotes=False) -> str:
    - Filter out short numeric-only text (length < 3 and only digits/dots)
    - remove_inner_quotes = True
        remove Chinese quotes
-        remove English queotes in and around chinese
+        remove English quotes in and around chinese
        Convert non-breaking spaces to regular spaces
        Convert narrow non-breaking spaces after non-digits to regular spaces

@ -2551,6 +2572,52 @@ def apply_source_ids_limit(
    return truncated


+def compute_incremental_chunk_ids(
+    existing_full_chunk_ids: list[str],
+    old_chunk_ids: list[str],
+    new_chunk_ids: list[str],
+) -> list[str]:
+    """
+    Compute incrementally updated chunk IDs based on changes.
+
+    This function applies delta changes (additions and removals) to an existing
+    list of chunk IDs while maintaining order and ensuring deduplication.
+    Delta additions from new_chunk_ids are placed at the end.
+
+    Args:
+        existing_full_chunk_ids: Complete list of existing chunk IDs from storage
+        old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
+        new_chunk_ids: New chunk IDs from updated source_id (chunks being added)
+
+    Returns:
+        Updated list of chunk IDs with deduplication
+
+    Example:
+        >>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
+        >>> old = ['chunk-1', 'chunk-2']
+        >>> new = ['chunk-2', 'chunk-4']
+        >>> compute_incremental_chunk_ids(existing, old, new)
+        ['chunk-3', 'chunk-2', 'chunk-4']
+    """
+    # Calculate changes
+    chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids)
+    chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids)
+
+    # Apply changes to full chunk_ids
+    # Step 1: Remove chunks that are no longer needed
+    updated_chunk_ids = [
+        cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove
+    ]
+
+    # Step 2: Add new chunks (preserving order from new_chunk_ids)
+    # Note: 'cid not in updated_chunk_ids' check ensures deduplication
+    for cid in new_chunk_ids:
+        if cid in chunks_to_add and cid not in updated_chunk_ids:
+            updated_chunk_ids.append(cid)
+
+    return updated_chunk_ids
+
+
 def subtract_source_ids(
    source_ids: Iterable[str],
    ids_to_remove: Collection[str],