Merge pull request #2068 from danielaskdd/fix-llm-option

refact: Smart Configuration Caching and Conditional Logging
2025-09-05 17:19:44 +08:00 · 2025-09-05 17:19:44 +08:00 · 8fd7bca8d7
commit 8fd7bca8d7
parent dc14623b42 385668dec5
3 changed files with 158 additions and 81 deletions
--- a/env.example
+++ b/env.example
@ -175,9 +175,8 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai

 ### OpenAI Specific Parameters
-### To mitigate endless output, set the temperature and frequency penalty parameter to a highter value
+### To mitigate endless output, set the temperature to a highter value
 # OPENAI_LLM_TEMPERATURE=0.8
-# OPENAI_FREQUENCY_PENALTY=1.2

 ### OpenRouter Specific Parameters
 # OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -97,11 +97,63 @@ def setup_signal_handlers():
    signal.signal(signal.SIGTERM, signal_handler)  # kill command


+class LLMConfigCache:
+    """Smart LLM and Embedding configuration cache class"""
+
+    def __init__(self, args):
+        self.args = args
+
+        # Initialize configurations based on binding conditions
+        self.openai_llm_options = None
+        self.ollama_llm_options = None
+        self.ollama_embedding_options = None
+
+        # Only initialize and log OpenAI options when using OpenAI-related bindings
+        if args.llm_binding in ["openai", "azure_openai"]:
+            from lightrag.llm.binding_options import OpenAILLMOptions
+
+            self.openai_llm_options = OpenAILLMOptions.options_dict(args)
+            logger.info(f"OpenAI LLM Options: {self.openai_llm_options}")
+
+        # Only initialize and log Ollama LLM options when using Ollama LLM binding
+        if args.llm_binding == "ollama":
+            try:
+                from lightrag.llm.binding_options import OllamaLLMOptions
+
+                self.ollama_llm_options = OllamaLLMOptions.options_dict(args)
+                logger.info(f"Ollama LLM Options: {self.ollama_llm_options}")
+            except ImportError:
+                logger.warning(
+                    "OllamaLLMOptions not available, using default configuration"
+                )
+                self.ollama_llm_options = {}
+
+        # Only initialize and log Ollama Embedding options when using Ollama Embedding binding
+        if args.embedding_binding == "ollama":
+            try:
+                from lightrag.llm.binding_options import OllamaEmbeddingOptions
+
+                self.ollama_embedding_options = OllamaEmbeddingOptions.options_dict(
+                    args
+                )
+                logger.info(
+                    f"Ollama Embedding Options: {self.ollama_embedding_options}"
+                )
+            except ImportError:
+                logger.warning(
+                    "OllamaEmbeddingOptions not available, using default configuration"
+                )
+                self.ollama_embedding_options = {}
+
+
 def create_app(args):
    # Setup logging
    logger.setLevel(args.log_level)
    set_verbose_debug(args.verbose)

+    # Create configuration cache (this will output configuration logs)
+    config_cache = LLMConfigCache(args)
+
    # Verify that bindings are correctly setup
    if args.llm_binding not in [
        "lollms",
@ -238,10 +290,85 @@ def create_app(args):
    # Create working directory if it doesn't exist
    Path(args.working_dir).mkdir(parents=True, exist_ok=True)

+    def create_optimized_openai_llm_func(
+        config_cache: LLMConfigCache, args, llm_timeout: int
+    ):
+        """Create optimized OpenAI LLM function with pre-processed configuration"""
+
+        async def optimized_openai_alike_model_complete(
+            prompt,
+            system_prompt=None,
+            history_messages=None,
+            keyword_extraction=False,
+            **kwargs,
+        ) -> str:
+            from lightrag.llm.openai import openai_complete_if_cache
+
+            keyword_extraction = kwargs.pop("keyword_extraction", None)
+            if keyword_extraction:
+                kwargs["response_format"] = GPTKeywordExtractionFormat
+            if history_messages is None:
+                history_messages = []
+
+            # Use pre-processed configuration to avoid repeated parsing
+            kwargs["timeout"] = llm_timeout
+            if config_cache.openai_llm_options:
+                kwargs.update(config_cache.openai_llm_options)
+
+            return await openai_complete_if_cache(
+                args.llm_model,
+                prompt,
+                system_prompt=system_prompt,
+                history_messages=history_messages,
+                base_url=args.llm_binding_host,
+                api_key=args.llm_binding_api_key,
+                **kwargs,
+            )
+
+        return optimized_openai_alike_model_complete
+
+    def create_optimized_azure_openai_llm_func(
+        config_cache: LLMConfigCache, args, llm_timeout: int
+    ):
+        """Create optimized Azure OpenAI LLM function with pre-processed configuration"""
+
+        async def optimized_azure_openai_model_complete(
+            prompt,
+            system_prompt=None,
+            history_messages=None,
+            keyword_extraction=False,
+            **kwargs,
+        ) -> str:
+            from lightrag.llm.azure_openai import azure_openai_complete_if_cache
+
+            keyword_extraction = kwargs.pop("keyword_extraction", None)
+            if keyword_extraction:
+                kwargs["response_format"] = GPTKeywordExtractionFormat
+            if history_messages is None:
+                history_messages = []
+
+            # Use pre-processed configuration to avoid repeated parsing
+            kwargs["timeout"] = llm_timeout
+            if config_cache.openai_llm_options:
+                kwargs.update(config_cache.openai_llm_options)
+
+            return await azure_openai_complete_if_cache(
+                args.llm_model,
+                prompt,
+                system_prompt=system_prompt,
+                history_messages=history_messages,
+                base_url=args.llm_binding_host,
+                api_key=os.getenv("AZURE_OPENAI_API_KEY", args.llm_binding_api_key),
+                api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
+                **kwargs,
+            )
+
+        return optimized_azure_openai_model_complete
+
    def create_llm_model_func(binding: str):
        """
        Create LLM model function based on binding type.
-        Uses lazy import to avoid unnecessary dependencies.
+        Uses optimized functions for OpenAI bindings and lazy import for others.
        """
        try:
            if binding == "lollms":
@ -255,9 +382,13 @@ def create_app(args):
            elif binding == "aws_bedrock":
                return bedrock_model_complete  # Already defined locally
            elif binding == "azure_openai":
-                return azure_openai_model_complete  # Already defined locally
+                # Use optimized function with pre-processed configuration
+                return create_optimized_azure_openai_llm_func(
+                    config_cache, args, llm_timeout
+                )
            else:  # openai and compatible
-                return openai_alike_model_complete  # Already defined locally
+                # Use optimized function with pre-processed configuration
+                return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
        except ImportError as e:
            raise Exception(f"Failed to import {binding} LLM binding: {e}")

@ -280,15 +411,15 @@ def create_app(args):
                raise Exception(f"Failed to import {binding} options: {e}")
        return {}

-    def create_embedding_function_with_lazy_import(
-        binding, model, host, api_key, dimensions, args
+    def create_optimized_embedding_function(
+        config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
    ):
        """
-        Create embedding function with lazy imports for all bindings.
-        Replaces the current create_embedding_function with full lazy import support.
+        Create optimized embedding function with pre-processed configuration for applicable bindings.
+        Uses lazy imports for all bindings and avoids repeated configuration parsing.
        """

-        async def embedding_function(texts):
+        async def optimized_embedding_function(texts):
            try:
                if binding == "lollms":
                    from lightrag.llm.lollms import lollms_embed
@ -297,10 +428,17 @@ def create_app(args):
                        texts, embed_model=model, host=host, api_key=api_key
                    )
                elif binding == "ollama":
-                    from lightrag.llm.binding_options import OllamaEmbeddingOptions
                    from lightrag.llm.ollama import ollama_embed

-                    ollama_options = OllamaEmbeddingOptions.options_dict(args)
+                    # Use pre-processed configuration if available, otherwise fallback to dynamic parsing
+                    if config_cache.ollama_embedding_options is not None:
+                        ollama_options = config_cache.ollama_embedding_options
+                    else:
+                        # Fallback for cases where config cache wasn't initialized properly
+                        from lightrag.llm.binding_options import OllamaEmbeddingOptions
+
+                        ollama_options = OllamaEmbeddingOptions.options_dict(args)
+
                    return await ollama_embed(
                        texts,
                        embed_model=model,
@ -331,78 +469,13 @@ def create_app(args):
            except ImportError as e:
                raise Exception(f"Failed to import {binding} embedding: {e}")

-        return embedding_function
+        return optimized_embedding_function

    llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int)
    embedding_timeout = get_env_value(
        "EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int
    )

-    async def openai_alike_model_complete(
-        prompt,
-        system_prompt=None,
-        history_messages=None,
-        keyword_extraction=False,
-        **kwargs,
-    ) -> str:
-        # Lazy import
-        from lightrag.llm.openai import openai_complete_if_cache
-        from lightrag.llm.binding_options import OpenAILLMOptions
-
-        keyword_extraction = kwargs.pop("keyword_extraction", None)
-        if keyword_extraction:
-            kwargs["response_format"] = GPTKeywordExtractionFormat
-        if history_messages is None:
-            history_messages = []
-
-        # Use OpenAI LLM options if available
-        openai_options = OpenAILLMOptions.options_dict(args)
-        kwargs["timeout"] = llm_timeout
-        kwargs.update(openai_options)
-
-        return await openai_complete_if_cache(
-            args.llm_model,
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            base_url=args.llm_binding_host,
-            api_key=args.llm_binding_api_key,
-            **kwargs,
-        )
-
-    async def azure_openai_model_complete(
-        prompt,
-        system_prompt=None,
-        history_messages=None,
-        keyword_extraction=False,
-        **kwargs,
-    ) -> str:
-        # Lazy import
-        from lightrag.llm.azure_openai import azure_openai_complete_if_cache
-        from lightrag.llm.binding_options import OpenAILLMOptions
-
-        keyword_extraction = kwargs.pop("keyword_extraction", None)
-        if keyword_extraction:
-            kwargs["response_format"] = GPTKeywordExtractionFormat
-        if history_messages is None:
-            history_messages = []
-
-        # Use OpenAI LLM options
-        openai_options = OpenAILLMOptions.options_dict(args)
-        kwargs["timeout"] = llm_timeout
-        kwargs.update(openai_options)
-
-        return await azure_openai_complete_if_cache(
-            args.llm_model,
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            base_url=args.llm_binding_host,
-            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
-            api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
-            **kwargs,
-        )
-
    async def bedrock_model_complete(
        prompt,
        system_prompt=None,
@ -430,16 +503,17 @@ def create_app(args):
            **kwargs,
        )

-    # Create embedding function with lazy imports
+    # Create embedding function with optimized configuration
    embedding_func = EmbeddingFunc(
        embedding_dim=args.embedding_dim,
-        func=create_embedding_function_with_lazy_import(
+        func=create_optimized_embedding_function(
+            config_cache=config_cache,
            binding=args.embedding_binding,
            model=args.embedding_model,
            host=args.embedding_binding_host,
            api_key=args.embedding_binding_api_key,
            dimensions=args.embedding_dim,
-            args=args,  # Pass args object for dynamic option generation
+            args=args,  # Pass args object for fallback option generation
        ),
    )

--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -870,6 +870,10 @@ async def _process_extraction_result(
            record = record.replace("<|>>", "<|>")
            # fix <<|> with <|>
            record = record.replace("<<|>", "<|>")
+            # fix <.|> with <|>
+            record = record.replace("<.|>", "<|>")
+            # fix <|.> with <|>
+            record = record.replace("<|.>", "<|>")

        record_attributes = split_string_by_multi_markers(record, [tuple_delimiter])