Add mandatory dimension parameter handling for Jina API compliance

This commit is contained in:
yangdx 2025-11-07 21:08:34 +08:00
parent d8a6355e41
commit c14f25b7f8
3 changed files with 36 additions and 26 deletions

View file

@ -244,9 +244,10 @@ OLLAMA_LLM_NUM_CTX=32768
# EMBEDDING_TIMEOUT=30
### Control whether to send embedding_dim parameter to embedding API
### Set to 'true' to enable dynamic dimension adjustment (only works for OpenAI and Jina)
### Set to 'false' (default) to disable sending dimension parameter
### Note: This is automatically ignored for backends that don't support dimension parameter
### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
# EMBEDDING_SEND_DIM=false
EMBEDDING_BINDING=ollama

View file

@ -643,9 +643,7 @@ def create_app(args):
elif binding == "jina":
from lightrag.llm.jina import jina_embed
return await jina_embed(
texts, base_url=host, api_key=api_key
)
return await jina_embed(texts, base_url=host, api_key=api_key)
else: # openai and compatible
from lightrag.llm.openai import openai_embed
@ -691,7 +689,7 @@ def create_app(args):
# Create embedding function with optimized configuration
import inspect
# Create the optimized embedding function
optimized_embedding_func = create_optimized_embedding_function(
config_cache=config_cache,
@ -701,27 +699,33 @@ def create_app(args):
api_key=args.embedding_binding_api_key,
args=args, # Pass args object for fallback option generation
)
# Check environment variable for sending dimensions
embedding_send_dim = os.getenv("EMBEDDING_SEND_DIM", "false").lower() == "true"
# Check if the function signature has embedding_dim parameter
# Note: Since optimized_embedding_func is an async function, inspect its signature
sig = inspect.signature(optimized_embedding_func)
has_embedding_dim_param = 'embedding_dim' in sig.parameters
# Determine send_dimensions value
# Only send dimensions if both conditions are met:
# 1. EMBEDDING_SEND_DIM environment variable is true
# 2. The function has embedding_dim parameter
send_dimensions = embedding_send_dim and has_embedding_dim_param
has_embedding_dim_param = "embedding_dim" in sig.parameters
# Determine send_dimensions value based on binding type
# Jina REQUIRES dimension parameter (forced to True)
# OpenAI and others: controlled by EMBEDDING_SEND_DIM environment variable
if args.embedding_binding == "jina":
# Jina API requires dimension parameter - always send it
send_dimensions = has_embedding_dim_param
dimension_control = "forced (Jina API requirement)"
else:
# For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting
send_dimensions = embedding_send_dim and has_embedding_dim_param
dimension_control = f"env_var={embedding_send_dim}"
logger.info(
f"Embedding configuration: send_dimensions={send_dimensions} "
f"(env_var={embedding_send_dim}, has_param={has_embedding_dim_param}, "
f"({dimension_control}, has_param={has_embedding_dim_param}, "
f"binding={args.embedding_binding})"
)
# Create EmbeddingFunc with send_dimensions attribute
embedding_func = EmbeddingFunc(
embedding_dim=args.embedding_dim,

View file

@ -353,24 +353,29 @@ class EmbeddingFunc:
embedding_dim: int
func: callable
max_token_size: int | None = None # deprecated keep it for compatible only
send_dimensions: bool = False # Control whether to send embedding_dim to the function
send_dimensions: bool = (
False # Control whether to send embedding_dim to the function
)
async def __call__(self, *args, **kwargs) -> np.ndarray:
# Only inject embedding_dim when send_dimensions is True
if self.send_dimensions:
# Check if user provided embedding_dim parameter
if 'embedding_dim' in kwargs:
user_provided_dim = kwargs['embedding_dim']
if "embedding_dim" in kwargs:
user_provided_dim = kwargs["embedding_dim"]
# If user's value differs from class attribute, output warning
if user_provided_dim is not None and user_provided_dim != self.embedding_dim:
if (
user_provided_dim is not None
and user_provided_dim != self.embedding_dim
):
logger.warning(
f"Ignoring user-provided embedding_dim={user_provided_dim}, "
f"using declared embedding_dim={self.embedding_dim} from decorator"
)
# Inject embedding_dim from decorator
kwargs['embedding_dim'] = self.embedding_dim
kwargs["embedding_dim"] = self.embedding_dim
return await self.func(*args, **kwargs)