Optimize LLM/embedding config caching to reduce repeated parsing overhead

• Add LLMConfigCache class for smart caching
• Pre-process OpenAI/Ollama configurations
• Create optimized function factories
• Reduce redundant option parsing calls
This commit is contained in:
yangdx 2025-09-05 16:36:08 +08:00
parent dc14623b42
commit a1df76a4ea

View file

@ -97,11 +97,63 @@ def setup_signal_handlers():
signal.signal(signal.SIGTERM, signal_handler) # kill command signal.signal(signal.SIGTERM, signal_handler) # kill command
class LLMConfigCache:
"""Smart LLM and Embedding configuration cache class"""
def __init__(self, args):
self.args = args
# Initialize configurations based on binding conditions
self.openai_llm_options = None
self.ollama_llm_options = None
self.ollama_embedding_options = None
# Only initialize and log OpenAI options when using OpenAI-related bindings
if args.llm_binding in ["openai", "azure_openai"]:
from lightrag.llm.binding_options import OpenAILLMOptions
self.openai_llm_options = OpenAILLMOptions.options_dict(args)
logger.info(f"OpenAI LLM Options: {self.openai_llm_options}")
# Only initialize and log Ollama LLM options when using Ollama LLM binding
if args.llm_binding == "ollama":
try:
from lightrag.llm.binding_options import OllamaLLMOptions
self.ollama_llm_options = OllamaLLMOptions.options_dict(args)
logger.info(f"Ollama LLM Options: {self.ollama_llm_options}")
except ImportError:
logger.warning(
"OllamaLLMOptions not available, using default configuration"
)
self.ollama_llm_options = {}
# Only initialize and log Ollama Embedding options when using Ollama Embedding binding
if args.embedding_binding == "ollama":
try:
from lightrag.llm.binding_options import OllamaEmbeddingOptions
self.ollama_embedding_options = OllamaEmbeddingOptions.options_dict(
args
)
logger.info(
f"Ollama Embedding Options: {self.ollama_embedding_options}"
)
except ImportError:
logger.warning(
"OllamaEmbeddingOptions not available, using default configuration"
)
self.ollama_embedding_options = {}
def create_app(args): def create_app(args):
# Setup logging # Setup logging
logger.setLevel(args.log_level) logger.setLevel(args.log_level)
set_verbose_debug(args.verbose) set_verbose_debug(args.verbose)
# Create configuration cache (this will output configuration logs)
config_cache = LLMConfigCache(args)
# Verify that bindings are correctly setup # Verify that bindings are correctly setup
if args.llm_binding not in [ if args.llm_binding not in [
"lollms", "lollms",
@ -238,10 +290,85 @@ def create_app(args):
# Create working directory if it doesn't exist # Create working directory if it doesn't exist
Path(args.working_dir).mkdir(parents=True, exist_ok=True) Path(args.working_dir).mkdir(parents=True, exist_ok=True)
def create_optimized_openai_llm_func(
config_cache: LLMConfigCache, args, llm_timeout: int
):
"""Create optimized OpenAI LLM function with pre-processed configuration"""
async def optimized_openai_alike_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
from lightrag.llm.openai import openai_complete_if_cache
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use pre-processed configuration to avoid repeated parsing
kwargs["timeout"] = llm_timeout
if config_cache.openai_llm_options:
kwargs.update(config_cache.openai_llm_options)
return await openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=args.llm_binding_api_key,
**kwargs,
)
return optimized_openai_alike_model_complete
def create_optimized_azure_openai_llm_func(
config_cache: LLMConfigCache, args, llm_timeout: int
):
"""Create optimized Azure OpenAI LLM function with pre-processed configuration"""
async def optimized_azure_openai_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
from lightrag.llm.azure_openai import azure_openai_complete_if_cache
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use pre-processed configuration to avoid repeated parsing
kwargs["timeout"] = llm_timeout
if config_cache.openai_llm_options:
kwargs.update(config_cache.openai_llm_options)
return await azure_openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=os.getenv("AZURE_OPENAI_API_KEY", args.llm_binding_api_key),
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
**kwargs,
)
return optimized_azure_openai_model_complete
def create_llm_model_func(binding: str): def create_llm_model_func(binding: str):
""" """
Create LLM model function based on binding type. Create LLM model function based on binding type.
Uses lazy import to avoid unnecessary dependencies. Uses optimized functions for OpenAI bindings and lazy import for others.
""" """
try: try:
if binding == "lollms": if binding == "lollms":
@ -255,9 +382,13 @@ def create_app(args):
elif binding == "aws_bedrock": elif binding == "aws_bedrock":
return bedrock_model_complete # Already defined locally return bedrock_model_complete # Already defined locally
elif binding == "azure_openai": elif binding == "azure_openai":
return azure_openai_model_complete # Already defined locally # Use optimized function with pre-processed configuration
return create_optimized_azure_openai_llm_func(
config_cache, args, llm_timeout
)
else: # openai and compatible else: # openai and compatible
return openai_alike_model_complete # Already defined locally # Use optimized function with pre-processed configuration
return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
except ImportError as e: except ImportError as e:
raise Exception(f"Failed to import {binding} LLM binding: {e}") raise Exception(f"Failed to import {binding} LLM binding: {e}")
@ -280,15 +411,15 @@ def create_app(args):
raise Exception(f"Failed to import {binding} options: {e}") raise Exception(f"Failed to import {binding} options: {e}")
return {} return {}
def create_embedding_function_with_lazy_import( def create_optimized_embedding_function(
binding, model, host, api_key, dimensions, args config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
): ):
""" """
Create embedding function with lazy imports for all bindings. Create optimized embedding function with pre-processed configuration for applicable bindings.
Replaces the current create_embedding_function with full lazy import support. Uses lazy imports for all bindings and avoids repeated configuration parsing.
""" """
async def embedding_function(texts): async def optimized_embedding_function(texts):
try: try:
if binding == "lollms": if binding == "lollms":
from lightrag.llm.lollms import lollms_embed from lightrag.llm.lollms import lollms_embed
@ -297,10 +428,17 @@ def create_app(args):
texts, embed_model=model, host=host, api_key=api_key texts, embed_model=model, host=host, api_key=api_key
) )
elif binding == "ollama": elif binding == "ollama":
from lightrag.llm.binding_options import OllamaEmbeddingOptions
from lightrag.llm.ollama import ollama_embed from lightrag.llm.ollama import ollama_embed
ollama_options = OllamaEmbeddingOptions.options_dict(args) # Use pre-processed configuration if available, otherwise fallback to dynamic parsing
if config_cache.ollama_embedding_options is not None:
ollama_options = config_cache.ollama_embedding_options
else:
# Fallback for cases where config cache wasn't initialized properly
from lightrag.llm.binding_options import OllamaEmbeddingOptions
ollama_options = OllamaEmbeddingOptions.options_dict(args)
return await ollama_embed( return await ollama_embed(
texts, texts,
embed_model=model, embed_model=model,
@ -331,78 +469,13 @@ def create_app(args):
except ImportError as e: except ImportError as e:
raise Exception(f"Failed to import {binding} embedding: {e}") raise Exception(f"Failed to import {binding} embedding: {e}")
return embedding_function return optimized_embedding_function
llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int) llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int)
embedding_timeout = get_env_value( embedding_timeout = get_env_value(
"EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int "EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int
) )
async def openai_alike_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
# Lazy import
from lightrag.llm.openai import openai_complete_if_cache
from lightrag.llm.binding_options import OpenAILLMOptions
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use OpenAI LLM options if available
openai_options = OpenAILLMOptions.options_dict(args)
kwargs["timeout"] = llm_timeout
kwargs.update(openai_options)
return await openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=args.llm_binding_api_key,
**kwargs,
)
async def azure_openai_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
# Lazy import
from lightrag.llm.azure_openai import azure_openai_complete_if_cache
from lightrag.llm.binding_options import OpenAILLMOptions
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use OpenAI LLM options
openai_options = OpenAILLMOptions.options_dict(args)
kwargs["timeout"] = llm_timeout
kwargs.update(openai_options)
return await azure_openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
**kwargs,
)
async def bedrock_model_complete( async def bedrock_model_complete(
prompt, prompt,
system_prompt=None, system_prompt=None,
@ -430,16 +503,17 @@ def create_app(args):
**kwargs, **kwargs,
) )
# Create embedding function with lazy imports # Create embedding function with optimized configuration
embedding_func = EmbeddingFunc( embedding_func = EmbeddingFunc(
embedding_dim=args.embedding_dim, embedding_dim=args.embedding_dim,
func=create_embedding_function_with_lazy_import( func=create_optimized_embedding_function(
config_cache=config_cache,
binding=args.embedding_binding, binding=args.embedding_binding,
model=args.embedding_model, model=args.embedding_model,
host=args.embedding_binding_host, host=args.embedding_binding_host,
api_key=args.embedding_binding_api_key, api_key=args.embedding_binding_api_key,
dimensions=args.embedding_dim, dimensions=args.embedding_dim,
args=args, # Pass args object for dynamic option generation args=args, # Pass args object for fallback option generation
), ),
) )