Prohibit direct access to internal functions of EmbeddingFunc.

• Fix similarity search error in query stage
• Remove redundant null checks
• Improve log readability
This commit is contained in:
yangdx 2025-11-08 01:43:36 +08:00
parent ffeeae4208
commit 03cc6262c4
2 changed files with 32 additions and 36 deletions

View file

@ -723,15 +723,18 @@ def create_app(args):
if args.embedding_binding == "jina": if args.embedding_binding == "jina":
# Jina API requires dimension parameter - always send it # Jina API requires dimension parameter - always send it
send_dimensions = has_embedding_dim_param send_dimensions = has_embedding_dim_param
dimension_control = "forced (Jina API requirement)" dimension_control = "forced by Jina API"
else: else:
# For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting # For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting
send_dimensions = embedding_send_dim and has_embedding_dim_param send_dimensions = embedding_send_dim and has_embedding_dim_param
dimension_control = f"env_var={embedding_send_dim}" if send_dimensions or not embedding_send_dim:
dimension_control = "by env var"
else:
dimension_control = "by not hasparam"
logger.info( logger.info(
f"Embedding configuration: send_dimensions={send_dimensions} " f"Send embedding dimension: {send_dimensions} {dimension_control} "
f"({dimension_control}, has_param={has_embedding_dim_param}, " f"(dimensions={args.embedding_dim}, has_param={has_embedding_dim_param}, "
f"binding={args.embedding_binding})" f"binding={args.embedding_binding})"
) )

View file

@ -3425,10 +3425,10 @@ async def _perform_kg_search(
) )
query_embedding = None query_embedding = None
if query and (kg_chunk_pick_method == "VECTOR" or chunks_vdb): if query and (kg_chunk_pick_method == "VECTOR" or chunks_vdb):
embedding_func_config = text_chunks_db.embedding_func actual_embedding_func = text_chunks_db.embedding_func
if embedding_func_config and embedding_func_config.func: if actual_embedding_func:
try: try:
query_embedding = await embedding_func_config.func([query]) query_embedding = await actual_embedding_func([query])
query_embedding = query_embedding[ query_embedding = query_embedding[
0 0
] # Extract first embedding from batch result ] # Extract first embedding from batch result
@ -4336,25 +4336,21 @@ async def _find_related_text_unit_from_entities(
num_of_chunks = int(max_related_chunks * len(entities_with_chunks) / 2) num_of_chunks = int(max_related_chunks * len(entities_with_chunks) / 2)
# Get embedding function from global config # Get embedding function from global config
embedding_func_config = text_chunks_db.embedding_func actual_embedding_func = text_chunks_db.embedding_func
if not embedding_func_config: if not actual_embedding_func:
logger.warning("No embedding function found, falling back to WEIGHT method") logger.warning("No embedding function found, falling back to WEIGHT method")
kg_chunk_pick_method = "WEIGHT" kg_chunk_pick_method = "WEIGHT"
else: else:
try: try:
actual_embedding_func = embedding_func_config.func selected_chunk_ids = await pick_by_vector_similarity(
query=query,
selected_chunk_ids = None text_chunks_storage=text_chunks_db,
if actual_embedding_func: chunks_vdb=chunks_vdb,
selected_chunk_ids = await pick_by_vector_similarity( num_of_chunks=num_of_chunks,
query=query, entity_info=entities_with_chunks,
text_chunks_storage=text_chunks_db, embedding_func=actual_embedding_func,
chunks_vdb=chunks_vdb, query_embedding=query_embedding,
num_of_chunks=num_of_chunks, )
entity_info=entities_with_chunks,
embedding_func=actual_embedding_func,
query_embedding=query_embedding,
)
if selected_chunk_ids == []: if selected_chunk_ids == []:
kg_chunk_pick_method = "WEIGHT" kg_chunk_pick_method = "WEIGHT"
@ -4629,24 +4625,21 @@ async def _find_related_text_unit_from_relations(
num_of_chunks = int(max_related_chunks * len(relations_with_chunks) / 2) num_of_chunks = int(max_related_chunks * len(relations_with_chunks) / 2)
# Get embedding function from global config # Get embedding function from global config
embedding_func_config = text_chunks_db.embedding_func actual_embedding_func = text_chunks_db.embedding_func
if not embedding_func_config: if not actual_embedding_func:
logger.warning("No embedding function found, falling back to WEIGHT method") logger.warning("No embedding function found, falling back to WEIGHT method")
kg_chunk_pick_method = "WEIGHT" kg_chunk_pick_method = "WEIGHT"
else: else:
try: try:
actual_embedding_func = embedding_func_config.func selected_chunk_ids = await pick_by_vector_similarity(
query=query,
if actual_embedding_func: text_chunks_storage=text_chunks_db,
selected_chunk_ids = await pick_by_vector_similarity( chunks_vdb=chunks_vdb,
query=query, num_of_chunks=num_of_chunks,
text_chunks_storage=text_chunks_db, entity_info=relations_with_chunks,
chunks_vdb=chunks_vdb, embedding_func=actual_embedding_func,
num_of_chunks=num_of_chunks, query_embedding=query_embedding,
entity_info=relations_with_chunks, )
embedding_func=actual_embedding_func,
query_embedding=query_embedding,
)
if selected_chunk_ids == []: if selected_chunk_ids == []:
kg_chunk_pick_method = "WEIGHT" kg_chunk_pick_method = "WEIGHT"