Optimize LLM/embedding config caching to reduce repeated parsing overhead
• Add LLMConfigCache class for smart caching • Pre-process OpenAI/Ollama configurations • Create optimized function factories • Reduce redundant option parsing calls
This commit is contained in:
parent
dc14623b42
commit
a1df76a4ea
1 changed files with 153 additions and 79 deletions
|
|
@ -97,11 +97,63 @@ def setup_signal_handlers():
|
||||||
signal.signal(signal.SIGTERM, signal_handler) # kill command
|
signal.signal(signal.SIGTERM, signal_handler) # kill command
|
||||||
|
|
||||||
|
|
||||||
|
class LLMConfigCache:
|
||||||
|
"""Smart LLM and Embedding configuration cache class"""
|
||||||
|
|
||||||
|
def __init__(self, args):
|
||||||
|
self.args = args
|
||||||
|
|
||||||
|
# Initialize configurations based on binding conditions
|
||||||
|
self.openai_llm_options = None
|
||||||
|
self.ollama_llm_options = None
|
||||||
|
self.ollama_embedding_options = None
|
||||||
|
|
||||||
|
# Only initialize and log OpenAI options when using OpenAI-related bindings
|
||||||
|
if args.llm_binding in ["openai", "azure_openai"]:
|
||||||
|
from lightrag.llm.binding_options import OpenAILLMOptions
|
||||||
|
|
||||||
|
self.openai_llm_options = OpenAILLMOptions.options_dict(args)
|
||||||
|
logger.info(f"OpenAI LLM Options: {self.openai_llm_options}")
|
||||||
|
|
||||||
|
# Only initialize and log Ollama LLM options when using Ollama LLM binding
|
||||||
|
if args.llm_binding == "ollama":
|
||||||
|
try:
|
||||||
|
from lightrag.llm.binding_options import OllamaLLMOptions
|
||||||
|
|
||||||
|
self.ollama_llm_options = OllamaLLMOptions.options_dict(args)
|
||||||
|
logger.info(f"Ollama LLM Options: {self.ollama_llm_options}")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning(
|
||||||
|
"OllamaLLMOptions not available, using default configuration"
|
||||||
|
)
|
||||||
|
self.ollama_llm_options = {}
|
||||||
|
|
||||||
|
# Only initialize and log Ollama Embedding options when using Ollama Embedding binding
|
||||||
|
if args.embedding_binding == "ollama":
|
||||||
|
try:
|
||||||
|
from lightrag.llm.binding_options import OllamaEmbeddingOptions
|
||||||
|
|
||||||
|
self.ollama_embedding_options = OllamaEmbeddingOptions.options_dict(
|
||||||
|
args
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Ollama Embedding Options: {self.ollama_embedding_options}"
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
logger.warning(
|
||||||
|
"OllamaEmbeddingOptions not available, using default configuration"
|
||||||
|
)
|
||||||
|
self.ollama_embedding_options = {}
|
||||||
|
|
||||||
|
|
||||||
def create_app(args):
|
def create_app(args):
|
||||||
# Setup logging
|
# Setup logging
|
||||||
logger.setLevel(args.log_level)
|
logger.setLevel(args.log_level)
|
||||||
set_verbose_debug(args.verbose)
|
set_verbose_debug(args.verbose)
|
||||||
|
|
||||||
|
# Create configuration cache (this will output configuration logs)
|
||||||
|
config_cache = LLMConfigCache(args)
|
||||||
|
|
||||||
# Verify that bindings are correctly setup
|
# Verify that bindings are correctly setup
|
||||||
if args.llm_binding not in [
|
if args.llm_binding not in [
|
||||||
"lollms",
|
"lollms",
|
||||||
|
|
@ -238,10 +290,85 @@ def create_app(args):
|
||||||
# Create working directory if it doesn't exist
|
# Create working directory if it doesn't exist
|
||||||
Path(args.working_dir).mkdir(parents=True, exist_ok=True)
|
Path(args.working_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def create_optimized_openai_llm_func(
|
||||||
|
config_cache: LLMConfigCache, args, llm_timeout: int
|
||||||
|
):
|
||||||
|
"""Create optimized OpenAI LLM function with pre-processed configuration"""
|
||||||
|
|
||||||
|
async def optimized_openai_alike_model_complete(
|
||||||
|
prompt,
|
||||||
|
system_prompt=None,
|
||||||
|
history_messages=None,
|
||||||
|
keyword_extraction=False,
|
||||||
|
**kwargs,
|
||||||
|
) -> str:
|
||||||
|
from lightrag.llm.openai import openai_complete_if_cache
|
||||||
|
|
||||||
|
keyword_extraction = kwargs.pop("keyword_extraction", None)
|
||||||
|
if keyword_extraction:
|
||||||
|
kwargs["response_format"] = GPTKeywordExtractionFormat
|
||||||
|
if history_messages is None:
|
||||||
|
history_messages = []
|
||||||
|
|
||||||
|
# Use pre-processed configuration to avoid repeated parsing
|
||||||
|
kwargs["timeout"] = llm_timeout
|
||||||
|
if config_cache.openai_llm_options:
|
||||||
|
kwargs.update(config_cache.openai_llm_options)
|
||||||
|
|
||||||
|
return await openai_complete_if_cache(
|
||||||
|
args.llm_model,
|
||||||
|
prompt,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
history_messages=history_messages,
|
||||||
|
base_url=args.llm_binding_host,
|
||||||
|
api_key=args.llm_binding_api_key,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return optimized_openai_alike_model_complete
|
||||||
|
|
||||||
|
def create_optimized_azure_openai_llm_func(
|
||||||
|
config_cache: LLMConfigCache, args, llm_timeout: int
|
||||||
|
):
|
||||||
|
"""Create optimized Azure OpenAI LLM function with pre-processed configuration"""
|
||||||
|
|
||||||
|
async def optimized_azure_openai_model_complete(
|
||||||
|
prompt,
|
||||||
|
system_prompt=None,
|
||||||
|
history_messages=None,
|
||||||
|
keyword_extraction=False,
|
||||||
|
**kwargs,
|
||||||
|
) -> str:
|
||||||
|
from lightrag.llm.azure_openai import azure_openai_complete_if_cache
|
||||||
|
|
||||||
|
keyword_extraction = kwargs.pop("keyword_extraction", None)
|
||||||
|
if keyword_extraction:
|
||||||
|
kwargs["response_format"] = GPTKeywordExtractionFormat
|
||||||
|
if history_messages is None:
|
||||||
|
history_messages = []
|
||||||
|
|
||||||
|
# Use pre-processed configuration to avoid repeated parsing
|
||||||
|
kwargs["timeout"] = llm_timeout
|
||||||
|
if config_cache.openai_llm_options:
|
||||||
|
kwargs.update(config_cache.openai_llm_options)
|
||||||
|
|
||||||
|
return await azure_openai_complete_if_cache(
|
||||||
|
args.llm_model,
|
||||||
|
prompt,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
history_messages=history_messages,
|
||||||
|
base_url=args.llm_binding_host,
|
||||||
|
api_key=os.getenv("AZURE_OPENAI_API_KEY", args.llm_binding_api_key),
|
||||||
|
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return optimized_azure_openai_model_complete
|
||||||
|
|
||||||
def create_llm_model_func(binding: str):
|
def create_llm_model_func(binding: str):
|
||||||
"""
|
"""
|
||||||
Create LLM model function based on binding type.
|
Create LLM model function based on binding type.
|
||||||
Uses lazy import to avoid unnecessary dependencies.
|
Uses optimized functions for OpenAI bindings and lazy import for others.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if binding == "lollms":
|
if binding == "lollms":
|
||||||
|
|
@ -255,9 +382,13 @@ def create_app(args):
|
||||||
elif binding == "aws_bedrock":
|
elif binding == "aws_bedrock":
|
||||||
return bedrock_model_complete # Already defined locally
|
return bedrock_model_complete # Already defined locally
|
||||||
elif binding == "azure_openai":
|
elif binding == "azure_openai":
|
||||||
return azure_openai_model_complete # Already defined locally
|
# Use optimized function with pre-processed configuration
|
||||||
|
return create_optimized_azure_openai_llm_func(
|
||||||
|
config_cache, args, llm_timeout
|
||||||
|
)
|
||||||
else: # openai and compatible
|
else: # openai and compatible
|
||||||
return openai_alike_model_complete # Already defined locally
|
# Use optimized function with pre-processed configuration
|
||||||
|
return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise Exception(f"Failed to import {binding} LLM binding: {e}")
|
raise Exception(f"Failed to import {binding} LLM binding: {e}")
|
||||||
|
|
||||||
|
|
@ -280,15 +411,15 @@ def create_app(args):
|
||||||
raise Exception(f"Failed to import {binding} options: {e}")
|
raise Exception(f"Failed to import {binding} options: {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def create_embedding_function_with_lazy_import(
|
def create_optimized_embedding_function(
|
||||||
binding, model, host, api_key, dimensions, args
|
config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create embedding function with lazy imports for all bindings.
|
Create optimized embedding function with pre-processed configuration for applicable bindings.
|
||||||
Replaces the current create_embedding_function with full lazy import support.
|
Uses lazy imports for all bindings and avoids repeated configuration parsing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async def embedding_function(texts):
|
async def optimized_embedding_function(texts):
|
||||||
try:
|
try:
|
||||||
if binding == "lollms":
|
if binding == "lollms":
|
||||||
from lightrag.llm.lollms import lollms_embed
|
from lightrag.llm.lollms import lollms_embed
|
||||||
|
|
@ -297,10 +428,17 @@ def create_app(args):
|
||||||
texts, embed_model=model, host=host, api_key=api_key
|
texts, embed_model=model, host=host, api_key=api_key
|
||||||
)
|
)
|
||||||
elif binding == "ollama":
|
elif binding == "ollama":
|
||||||
from lightrag.llm.binding_options import OllamaEmbeddingOptions
|
|
||||||
from lightrag.llm.ollama import ollama_embed
|
from lightrag.llm.ollama import ollama_embed
|
||||||
|
|
||||||
ollama_options = OllamaEmbeddingOptions.options_dict(args)
|
# Use pre-processed configuration if available, otherwise fallback to dynamic parsing
|
||||||
|
if config_cache.ollama_embedding_options is not None:
|
||||||
|
ollama_options = config_cache.ollama_embedding_options
|
||||||
|
else:
|
||||||
|
# Fallback for cases where config cache wasn't initialized properly
|
||||||
|
from lightrag.llm.binding_options import OllamaEmbeddingOptions
|
||||||
|
|
||||||
|
ollama_options = OllamaEmbeddingOptions.options_dict(args)
|
||||||
|
|
||||||
return await ollama_embed(
|
return await ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=model,
|
embed_model=model,
|
||||||
|
|
@ -331,78 +469,13 @@ def create_app(args):
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise Exception(f"Failed to import {binding} embedding: {e}")
|
raise Exception(f"Failed to import {binding} embedding: {e}")
|
||||||
|
|
||||||
return embedding_function
|
return optimized_embedding_function
|
||||||
|
|
||||||
llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int)
|
llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int)
|
||||||
embedding_timeout = get_env_value(
|
embedding_timeout = get_env_value(
|
||||||
"EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int
|
"EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int
|
||||||
)
|
)
|
||||||
|
|
||||||
async def openai_alike_model_complete(
|
|
||||||
prompt,
|
|
||||||
system_prompt=None,
|
|
||||||
history_messages=None,
|
|
||||||
keyword_extraction=False,
|
|
||||||
**kwargs,
|
|
||||||
) -> str:
|
|
||||||
# Lazy import
|
|
||||||
from lightrag.llm.openai import openai_complete_if_cache
|
|
||||||
from lightrag.llm.binding_options import OpenAILLMOptions
|
|
||||||
|
|
||||||
keyword_extraction = kwargs.pop("keyword_extraction", None)
|
|
||||||
if keyword_extraction:
|
|
||||||
kwargs["response_format"] = GPTKeywordExtractionFormat
|
|
||||||
if history_messages is None:
|
|
||||||
history_messages = []
|
|
||||||
|
|
||||||
# Use OpenAI LLM options if available
|
|
||||||
openai_options = OpenAILLMOptions.options_dict(args)
|
|
||||||
kwargs["timeout"] = llm_timeout
|
|
||||||
kwargs.update(openai_options)
|
|
||||||
|
|
||||||
return await openai_complete_if_cache(
|
|
||||||
args.llm_model,
|
|
||||||
prompt,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
history_messages=history_messages,
|
|
||||||
base_url=args.llm_binding_host,
|
|
||||||
api_key=args.llm_binding_api_key,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def azure_openai_model_complete(
|
|
||||||
prompt,
|
|
||||||
system_prompt=None,
|
|
||||||
history_messages=None,
|
|
||||||
keyword_extraction=False,
|
|
||||||
**kwargs,
|
|
||||||
) -> str:
|
|
||||||
# Lazy import
|
|
||||||
from lightrag.llm.azure_openai import azure_openai_complete_if_cache
|
|
||||||
from lightrag.llm.binding_options import OpenAILLMOptions
|
|
||||||
|
|
||||||
keyword_extraction = kwargs.pop("keyword_extraction", None)
|
|
||||||
if keyword_extraction:
|
|
||||||
kwargs["response_format"] = GPTKeywordExtractionFormat
|
|
||||||
if history_messages is None:
|
|
||||||
history_messages = []
|
|
||||||
|
|
||||||
# Use OpenAI LLM options
|
|
||||||
openai_options = OpenAILLMOptions.options_dict(args)
|
|
||||||
kwargs["timeout"] = llm_timeout
|
|
||||||
kwargs.update(openai_options)
|
|
||||||
|
|
||||||
return await azure_openai_complete_if_cache(
|
|
||||||
args.llm_model,
|
|
||||||
prompt,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
history_messages=history_messages,
|
|
||||||
base_url=args.llm_binding_host,
|
|
||||||
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
|
||||||
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def bedrock_model_complete(
|
async def bedrock_model_complete(
|
||||||
prompt,
|
prompt,
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
|
|
@ -430,16 +503,17 @@ def create_app(args):
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create embedding function with lazy imports
|
# Create embedding function with optimized configuration
|
||||||
embedding_func = EmbeddingFunc(
|
embedding_func = EmbeddingFunc(
|
||||||
embedding_dim=args.embedding_dim,
|
embedding_dim=args.embedding_dim,
|
||||||
func=create_embedding_function_with_lazy_import(
|
func=create_optimized_embedding_function(
|
||||||
|
config_cache=config_cache,
|
||||||
binding=args.embedding_binding,
|
binding=args.embedding_binding,
|
||||||
model=args.embedding_model,
|
model=args.embedding_model,
|
||||||
host=args.embedding_binding_host,
|
host=args.embedding_binding_host,
|
||||||
api_key=args.embedding_binding_api_key,
|
api_key=args.embedding_binding_api_key,
|
||||||
dimensions=args.embedding_dim,
|
dimensions=args.embedding_dim,
|
||||||
args=args, # Pass args object for dynamic option generation
|
args=args, # Pass args object for fallback option generation
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue