This commit is contained in:
Raphaël MANSUY 2025-12-04 19:14:26 +08:00
parent cacea8ab56
commit 0166a38d01

View file

@ -511,7 +511,9 @@ def create_app(args):
return optimized_azure_openai_model_complete return optimized_azure_openai_model_complete
def create_optimized_gemini_llm_func(config_cache: LLMConfigCache, args): def create_optimized_gemini_llm_func(
config_cache: LLMConfigCache, args, llm_timeout: int
):
"""Create optimized Gemini LLM function with cached configuration""" """Create optimized Gemini LLM function with cached configuration"""
async def optimized_gemini_model_complete( async def optimized_gemini_model_complete(
@ -526,6 +528,8 @@ def create_app(args):
if history_messages is None: if history_messages is None:
history_messages = [] history_messages = []
# Use pre-processed configuration to avoid repeated parsing
kwargs["timeout"] = llm_timeout
if ( if (
config_cache.gemini_llm_options is not None config_cache.gemini_llm_options is not None
and "generation_config" not in kwargs and "generation_config" not in kwargs
@ -567,7 +571,7 @@ def create_app(args):
config_cache, args, llm_timeout config_cache, args, llm_timeout
) )
elif binding == "gemini": elif binding == "gemini":
return create_optimized_gemini_llm_func(config_cache, args) return create_optimized_gemini_llm_func(config_cache, args, llm_timeout)
else: # openai and compatible else: # openai and compatible
# Use optimized function with pre-processed configuration # Use optimized function with pre-processed configuration
return create_optimized_openai_llm_func(config_cache, args, llm_timeout) return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
@ -601,7 +605,7 @@ def create_app(args):
Uses lazy imports for all bindings and avoids repeated configuration parsing. Uses lazy imports for all bindings and avoids repeated configuration parsing.
""" """
async def optimized_embedding_function(texts): async def optimized_embedding_function(texts, embedding_dim=None):
try: try:
if binding == "lollms": if binding == "lollms":
from lightrag.llm.lollms import lollms_embed from lightrag.llm.lollms import lollms_embed
@ -640,13 +644,13 @@ def create_app(args):
from lightrag.llm.jina import jina_embed from lightrag.llm.jina import jina_embed
return await jina_embed( return await jina_embed(
texts, base_url=host, api_key=api_key texts, embedding_dim=embedding_dim, base_url=host, api_key=api_key
) )
else: # openai and compatible else: # openai and compatible
from lightrag.llm.openai import openai_embed from lightrag.llm.openai import openai_embed
return await openai_embed( return await openai_embed(
texts, model=model, base_url=host, api_key=api_key texts, model=model, base_url=host, api_key=api_key, embedding_dim=embedding_dim
) )
except ImportError as e: except ImportError as e:
raise Exception(f"Failed to import {binding} embedding: {e}") raise Exception(f"Failed to import {binding} embedding: {e}")
@ -687,7 +691,7 @@ def create_app(args):
# Create embedding function with optimized configuration # Create embedding function with optimized configuration
import inspect import inspect
# Create the optimized embedding function # Create the optimized embedding function
optimized_embedding_func = create_optimized_embedding_function( optimized_embedding_func = create_optimized_embedding_function(
config_cache=config_cache, config_cache=config_cache,
@ -697,27 +701,33 @@ def create_app(args):
api_key=args.embedding_binding_api_key, api_key=args.embedding_binding_api_key,
args=args, # Pass args object for fallback option generation args=args, # Pass args object for fallback option generation
) )
# Check environment variable for sending dimensions # Check environment variable for sending dimensions
embedding_send_dim = os.getenv("EMBEDDING_SEND_DIM", "false").lower() == "true" embedding_send_dim = os.getenv("EMBEDDING_SEND_DIM", "false").lower() == "true"
# Check if the function signature has embedding_dim parameter # Check if the function signature has embedding_dim parameter
# Note: Since optimized_embedding_func is an async function, inspect its signature # Note: Since optimized_embedding_func is an async function, inspect its signature
sig = inspect.signature(optimized_embedding_func) sig = inspect.signature(optimized_embedding_func)
has_embedding_dim_param = 'embedding_dim' in sig.parameters has_embedding_dim_param = "embedding_dim" in sig.parameters
# Determine send_dimensions value # Determine send_dimensions value based on binding type
# Only send dimensions if both conditions are met: # Jina REQUIRES dimension parameter (forced to True)
# 1. EMBEDDING_SEND_DIM environment variable is true # OpenAI and others: controlled by EMBEDDING_SEND_DIM environment variable
# 2. The function has embedding_dim parameter if args.embedding_binding == "jina":
send_dimensions = embedding_send_dim and has_embedding_dim_param # Jina API requires dimension parameter - always send it
send_dimensions = has_embedding_dim_param
dimension_control = "forced (Jina API requirement)"
else:
# For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting
send_dimensions = embedding_send_dim and has_embedding_dim_param
dimension_control = f"env_var={embedding_send_dim}"
logger.info( logger.info(
f"Embedding configuration: send_dimensions={send_dimensions} " f"Embedding configuration: send_dimensions={send_dimensions} "
f"(env_var={embedding_send_dim}, has_param={has_embedding_dim_param}, " f"({dimension_control}, has_param={has_embedding_dim_param}, "
f"binding={args.embedding_binding})" f"binding={args.embedding_binding})"
) )
# Create EmbeddingFunc with send_dimensions attribute # Create EmbeddingFunc with send_dimensions attribute
embedding_func = EmbeddingFunc( embedding_func = EmbeddingFunc(
embedding_dim=args.embedding_dim, embedding_dim=args.embedding_dim,