Merge pull request #2068 from danielaskdd/fix-llm-option

refact: Smart Configuration Caching and Conditional Logging
This commit is contained in:
Daniel.y 2025-09-05 17:19:44 +08:00 committed by GitHub
commit 8fd7bca8d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 158 additions and 81 deletions

View file

@ -175,9 +175,8 @@ LLM_BINDING_API_KEY=your_api_key
# LLM_BINDING=openai
### OpenAI Specific Parameters
### To mitigate endless output, set the temperature and frequency penalty parameter to a highter value
### To mitigate endless output, set the temperature to a highter value
# OPENAI_LLM_TEMPERATURE=0.8
# OPENAI_FREQUENCY_PENALTY=1.2
### OpenRouter Specific Parameters
# OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'

View file

@ -97,11 +97,63 @@ def setup_signal_handlers():
signal.signal(signal.SIGTERM, signal_handler) # kill command
class LLMConfigCache:
"""Smart LLM and Embedding configuration cache class"""
def __init__(self, args):
self.args = args
# Initialize configurations based on binding conditions
self.openai_llm_options = None
self.ollama_llm_options = None
self.ollama_embedding_options = None
# Only initialize and log OpenAI options when using OpenAI-related bindings
if args.llm_binding in ["openai", "azure_openai"]:
from lightrag.llm.binding_options import OpenAILLMOptions
self.openai_llm_options = OpenAILLMOptions.options_dict(args)
logger.info(f"OpenAI LLM Options: {self.openai_llm_options}")
# Only initialize and log Ollama LLM options when using Ollama LLM binding
if args.llm_binding == "ollama":
try:
from lightrag.llm.binding_options import OllamaLLMOptions
self.ollama_llm_options = OllamaLLMOptions.options_dict(args)
logger.info(f"Ollama LLM Options: {self.ollama_llm_options}")
except ImportError:
logger.warning(
"OllamaLLMOptions not available, using default configuration"
)
self.ollama_llm_options = {}
# Only initialize and log Ollama Embedding options when using Ollama Embedding binding
if args.embedding_binding == "ollama":
try:
from lightrag.llm.binding_options import OllamaEmbeddingOptions
self.ollama_embedding_options = OllamaEmbeddingOptions.options_dict(
args
)
logger.info(
f"Ollama Embedding Options: {self.ollama_embedding_options}"
)
except ImportError:
logger.warning(
"OllamaEmbeddingOptions not available, using default configuration"
)
self.ollama_embedding_options = {}
def create_app(args):
# Setup logging
logger.setLevel(args.log_level)
set_verbose_debug(args.verbose)
# Create configuration cache (this will output configuration logs)
config_cache = LLMConfigCache(args)
# Verify that bindings are correctly setup
if args.llm_binding not in [
"lollms",
@ -238,10 +290,85 @@ def create_app(args):
# Create working directory if it doesn't exist
Path(args.working_dir).mkdir(parents=True, exist_ok=True)
def create_optimized_openai_llm_func(
config_cache: LLMConfigCache, args, llm_timeout: int
):
"""Create optimized OpenAI LLM function with pre-processed configuration"""
async def optimized_openai_alike_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
from lightrag.llm.openai import openai_complete_if_cache
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use pre-processed configuration to avoid repeated parsing
kwargs["timeout"] = llm_timeout
if config_cache.openai_llm_options:
kwargs.update(config_cache.openai_llm_options)
return await openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=args.llm_binding_api_key,
**kwargs,
)
return optimized_openai_alike_model_complete
def create_optimized_azure_openai_llm_func(
config_cache: LLMConfigCache, args, llm_timeout: int
):
"""Create optimized Azure OpenAI LLM function with pre-processed configuration"""
async def optimized_azure_openai_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
from lightrag.llm.azure_openai import azure_openai_complete_if_cache
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use pre-processed configuration to avoid repeated parsing
kwargs["timeout"] = llm_timeout
if config_cache.openai_llm_options:
kwargs.update(config_cache.openai_llm_options)
return await azure_openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=os.getenv("AZURE_OPENAI_API_KEY", args.llm_binding_api_key),
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
**kwargs,
)
return optimized_azure_openai_model_complete
def create_llm_model_func(binding: str):
"""
Create LLM model function based on binding type.
Uses lazy import to avoid unnecessary dependencies.
Uses optimized functions for OpenAI bindings and lazy import for others.
"""
try:
if binding == "lollms":
@ -255,9 +382,13 @@ def create_app(args):
elif binding == "aws_bedrock":
return bedrock_model_complete # Already defined locally
elif binding == "azure_openai":
return azure_openai_model_complete # Already defined locally
# Use optimized function with pre-processed configuration
return create_optimized_azure_openai_llm_func(
config_cache, args, llm_timeout
)
else: # openai and compatible
return openai_alike_model_complete # Already defined locally
# Use optimized function with pre-processed configuration
return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
except ImportError as e:
raise Exception(f"Failed to import {binding} LLM binding: {e}")
@ -280,15 +411,15 @@ def create_app(args):
raise Exception(f"Failed to import {binding} options: {e}")
return {}
def create_embedding_function_with_lazy_import(
binding, model, host, api_key, dimensions, args
def create_optimized_embedding_function(
config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
):
"""
Create embedding function with lazy imports for all bindings.
Replaces the current create_embedding_function with full lazy import support.
Create optimized embedding function with pre-processed configuration for applicable bindings.
Uses lazy imports for all bindings and avoids repeated configuration parsing.
"""
async def embedding_function(texts):
async def optimized_embedding_function(texts):
try:
if binding == "lollms":
from lightrag.llm.lollms import lollms_embed
@ -297,10 +428,17 @@ def create_app(args):
texts, embed_model=model, host=host, api_key=api_key
)
elif binding == "ollama":
from lightrag.llm.binding_options import OllamaEmbeddingOptions
from lightrag.llm.ollama import ollama_embed
ollama_options = OllamaEmbeddingOptions.options_dict(args)
# Use pre-processed configuration if available, otherwise fallback to dynamic parsing
if config_cache.ollama_embedding_options is not None:
ollama_options = config_cache.ollama_embedding_options
else:
# Fallback for cases where config cache wasn't initialized properly
from lightrag.llm.binding_options import OllamaEmbeddingOptions
ollama_options = OllamaEmbeddingOptions.options_dict(args)
return await ollama_embed(
texts,
embed_model=model,
@ -331,78 +469,13 @@ def create_app(args):
except ImportError as e:
raise Exception(f"Failed to import {binding} embedding: {e}")
return embedding_function
return optimized_embedding_function
llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int)
embedding_timeout = get_env_value(
"EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int
)
async def openai_alike_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
# Lazy import
from lightrag.llm.openai import openai_complete_if_cache
from lightrag.llm.binding_options import OpenAILLMOptions
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use OpenAI LLM options if available
openai_options = OpenAILLMOptions.options_dict(args)
kwargs["timeout"] = llm_timeout
kwargs.update(openai_options)
return await openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=args.llm_binding_api_key,
**kwargs,
)
async def azure_openai_model_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
# Lazy import
from lightrag.llm.azure_openai import azure_openai_complete_if_cache
from lightrag.llm.binding_options import OpenAILLMOptions
keyword_extraction = kwargs.pop("keyword_extraction", None)
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
# Use OpenAI LLM options
openai_options = OpenAILLMOptions.options_dict(args)
kwargs["timeout"] = llm_timeout
kwargs.update(openai_options)
return await azure_openai_complete_if_cache(
args.llm_model,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url=args.llm_binding_host,
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
**kwargs,
)
async def bedrock_model_complete(
prompt,
system_prompt=None,
@ -430,16 +503,17 @@ def create_app(args):
**kwargs,
)
# Create embedding function with lazy imports
# Create embedding function with optimized configuration
embedding_func = EmbeddingFunc(
embedding_dim=args.embedding_dim,
func=create_embedding_function_with_lazy_import(
func=create_optimized_embedding_function(
config_cache=config_cache,
binding=args.embedding_binding,
model=args.embedding_model,
host=args.embedding_binding_host,
api_key=args.embedding_binding_api_key,
dimensions=args.embedding_dim,
args=args, # Pass args object for dynamic option generation
args=args, # Pass args object for fallback option generation
),
)

View file

@ -870,6 +870,10 @@ async def _process_extraction_result(
record = record.replace("<|>>", "<|>")
# fix <<|> with <|>
record = record.replace("<<|>", "<|>")
# fix <.|> with <|>
record = record.replace("<.|>", "<|>")
# fix <|.> with <|>
record = record.replace("<|.>", "<|>")
record_attributes = split_string_by_multi_markers(record, [tuple_delimiter])