Optimize LLM/embedding config caching to reduce repeated parsing overhead

• Add LLMConfigCache class for smart caching • Pre-process OpenAI/Ollama configurations • Create optimized function factories • Reduce redundant option parsing calls
2025-09-05 16:36:08 +08:00 · 2025-09-05 16:36:08 +08:00 · a1df76a4ea
commit a1df76a4ea
parent dc14623b42
1 changed files with 153 additions and 79 deletions
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -97,11 +97,63 @@ def setup_signal_handlers():
    signal.signal(signal.SIGTERM, signal_handler)  # kill command
 class LLMConfigCache:
    """Smart LLM and Embedding configuration cache class"""
    def __init__(self, args):
        self.args = args
        # Initialize configurations based on binding conditions
        self.openai_llm_options = None
        self.ollama_llm_options = None
        self.ollama_embedding_options = None
        # Only initialize and log OpenAI options when using OpenAI-related bindings
        if args.llm_binding in ["openai", "azure_openai"]:
            from lightrag.llm.binding_options import OpenAILLMOptions
            self.openai_llm_options = OpenAILLMOptions.options_dict(args)
            logger.info(f"OpenAI LLM Options: {self.openai_llm_options}")
        # Only initialize and log Ollama LLM options when using Ollama LLM binding
        if args.llm_binding == "ollama":
            try:
                from lightrag.llm.binding_options import OllamaLLMOptions
                self.ollama_llm_options = OllamaLLMOptions.options_dict(args)
                logger.info(f"Ollama LLM Options: {self.ollama_llm_options}")
            except ImportError:
                logger.warning(
                    "OllamaLLMOptions not available, using default configuration"
                )
                self.ollama_llm_options = {}
        # Only initialize and log Ollama Embedding options when using Ollama Embedding binding
        if args.embedding_binding == "ollama":
            try:
                from lightrag.llm.binding_options import OllamaEmbeddingOptions
                self.ollama_embedding_options = OllamaEmbeddingOptions.options_dict(
                    args
                )
                logger.info(
                    f"Ollama Embedding Options: {self.ollama_embedding_options}"
                )
            except ImportError:
                logger.warning(
                    "OllamaEmbeddingOptions not available, using default configuration"
                )
                self.ollama_embedding_options = {}
 def create_app(args):
    # Setup logging
    logger.setLevel(args.log_level)
    set_verbose_debug(args.verbose)
    # Create configuration cache (this will output configuration logs)
    config_cache = LLMConfigCache(args)
    # Verify that bindings are correctly setup
    if args.llm_binding not in [
        "lollms",
@ -238,10 +290,85 @@ def create_app(args):
    # Create working directory if it doesn't exist
    Path(args.working_dir).mkdir(parents=True, exist_ok=True)
    def create_optimized_openai_llm_func(
        config_cache: LLMConfigCache, args, llm_timeout: int
    ):
        """Create optimized OpenAI LLM function with pre-processed configuration"""
        async def optimized_openai_alike_model_complete(
            prompt,
            system_prompt=None,
            history_messages=None,
            keyword_extraction=False,
            **kwargs,
        ) -> str:
            from lightrag.llm.openai import openai_complete_if_cache
            keyword_extraction = kwargs.pop("keyword_extraction", None)
            if keyword_extraction:
                kwargs["response_format"] = GPTKeywordExtractionFormat
            if history_messages is None:
                history_messages = []
            # Use pre-processed configuration to avoid repeated parsing
            kwargs["timeout"] = llm_timeout
            if config_cache.openai_llm_options:
                kwargs.update(config_cache.openai_llm_options)
            return await openai_complete_if_cache(
                args.llm_model,
                prompt,
                system_prompt=system_prompt,
                history_messages=history_messages,
                base_url=args.llm_binding_host,
                api_key=args.llm_binding_api_key,
                **kwargs,
            )
        return optimized_openai_alike_model_complete
    def create_optimized_azure_openai_llm_func(
        config_cache: LLMConfigCache, args, llm_timeout: int
    ):
        """Create optimized Azure OpenAI LLM function with pre-processed configuration"""
        async def optimized_azure_openai_model_complete(
            prompt,
            system_prompt=None,
            history_messages=None,
            keyword_extraction=False,
            **kwargs,
        ) -> str:
            from lightrag.llm.azure_openai import azure_openai_complete_if_cache
            keyword_extraction = kwargs.pop("keyword_extraction", None)
            if keyword_extraction:
                kwargs["response_format"] = GPTKeywordExtractionFormat
            if history_messages is None:
                history_messages = []
            # Use pre-processed configuration to avoid repeated parsing
            kwargs["timeout"] = llm_timeout
            if config_cache.openai_llm_options:
                kwargs.update(config_cache.openai_llm_options)
            return await azure_openai_complete_if_cache(
                args.llm_model,
                prompt,
                system_prompt=system_prompt,
                history_messages=history_messages,
                base_url=args.llm_binding_host,
                api_key=os.getenv("AZURE_OPENAI_API_KEY", args.llm_binding_api_key),
                api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
                **kwargs,
            )
        return optimized_azure_openai_model_complete
    def create_llm_model_func(binding: str):
        """
        Create LLM model function based on binding type.
-        Uses lazy import to avoid unnecessary dependencies.
+        Uses optimized functions for OpenAI bindings and lazy import for others.
        """
        try:
            if binding == "lollms":
@ -255,9 +382,13 @@ def create_app(args):
            elif binding == "aws_bedrock":
                return bedrock_model_complete  # Already defined locally
            elif binding == "azure_openai":
-                return azure_openai_model_complete  # Already defined locally
+                # Use optimized function with pre-processed configuration
                return create_optimized_azure_openai_llm_func(
                    config_cache, args, llm_timeout
                )
            else:  # openai and compatible
-                return openai_alike_model_complete  # Already defined locally
+                # Use optimized function with pre-processed configuration
                return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
        except ImportError as e:
            raise Exception(f"Failed to import {binding} LLM binding: {e}")
@ -280,15 +411,15 @@ def create_app(args):
                raise Exception(f"Failed to import {binding} options: {e}")
        return {}
-    def create_embedding_function_with_lazy_import(
+    def create_optimized_embedding_function(
-        binding, model, host, api_key, dimensions, args
+        config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
    ):
        """
-        Create embedding function with lazy imports for all bindings.
+        Create optimized embedding function with pre-processed configuration for applicable bindings.
-        Replaces the current create_embedding_function with full lazy import support.
+        Uses lazy imports for all bindings and avoids repeated configuration parsing.
        """
-        async def embedding_function(texts):
+        async def optimized_embedding_function(texts):
            try:
                if binding == "lollms":
                    from lightrag.llm.lollms import lollms_embed
@ -297,10 +428,17 @@ def create_app(args):
                        texts, embed_model=model, host=host, api_key=api_key
                    )
                elif binding == "ollama":
                    from lightrag.llm.binding_options import OllamaEmbeddingOptions
                    from lightrag.llm.ollama import ollama_embed
-                    ollama_options = OllamaEmbeddingOptions.options_dict(args)
+                    # Use pre-processed configuration if available, otherwise fallback to dynamic parsing
                    if config_cache.ollama_embedding_options is not None:
                        ollama_options = config_cache.ollama_embedding_options
                    else:
                        # Fallback for cases where config cache wasn't initialized properly
                        from lightrag.llm.binding_options import OllamaEmbeddingOptions
                        ollama_options = OllamaEmbeddingOptions.options_dict(args)
                    return await ollama_embed(
                        texts,
                        embed_model=model,
@ -331,78 +469,13 @@ def create_app(args):
            except ImportError as e:
                raise Exception(f"Failed to import {binding} embedding: {e}")
-        return embedding_function
+        return optimized_embedding_function
    llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int)
    embedding_timeout = get_env_value(
        "EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int
    )
    async def openai_alike_model_complete(
        prompt,
        system_prompt=None,
        history_messages=None,
        keyword_extraction=False,
        **kwargs,
    ) -> str:
        # Lazy import
        from lightrag.llm.openai import openai_complete_if_cache
        from lightrag.llm.binding_options import OpenAILLMOptions
        keyword_extraction = kwargs.pop("keyword_extraction", None)
        if keyword_extraction:
            kwargs["response_format"] = GPTKeywordExtractionFormat
        if history_messages is None:
            history_messages = []
        # Use OpenAI LLM options if available
        openai_options = OpenAILLMOptions.options_dict(args)
        kwargs["timeout"] = llm_timeout
        kwargs.update(openai_options)
        return await openai_complete_if_cache(
            args.llm_model,
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            base_url=args.llm_binding_host,
            api_key=args.llm_binding_api_key,
            **kwargs,
        )
    async def azure_openai_model_complete(
        prompt,
        system_prompt=None,
        history_messages=None,
        keyword_extraction=False,
        **kwargs,
    ) -> str:
        # Lazy import
        from lightrag.llm.azure_openai import azure_openai_complete_if_cache
        from lightrag.llm.binding_options import OpenAILLMOptions
        keyword_extraction = kwargs.pop("keyword_extraction", None)
        if keyword_extraction:
            kwargs["response_format"] = GPTKeywordExtractionFormat
        if history_messages is None:
            history_messages = []
        # Use OpenAI LLM options
        openai_options = OpenAILLMOptions.options_dict(args)
        kwargs["timeout"] = llm_timeout
        kwargs.update(openai_options)
        return await azure_openai_complete_if_cache(
            args.llm_model,
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            base_url=args.llm_binding_host,
            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
            api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
            **kwargs,
        )
    async def bedrock_model_complete(
        prompt,
        system_prompt=None,
@ -430,16 +503,17 @@ def create_app(args):
            **kwargs,
        )
-    # Create embedding function with lazy imports
+    # Create embedding function with optimized configuration
    embedding_func = EmbeddingFunc(
        embedding_dim=args.embedding_dim,
-        func=create_embedding_function_with_lazy_import(
+        func=create_optimized_embedding_function(
            config_cache=config_cache,
            binding=args.embedding_binding,
            model=args.embedding_model,
            host=args.embedding_binding_host,
            api_key=args.embedding_binding_api_key,
            dimensions=args.embedding_dim,
-            args=args,  # Pass args object for dynamic option generation
+            args=args,  # Pass args object for fallback option generation
        ),
    )