Prohibit direct access to internal functions of EmbeddingFunc.

• Fix similarity search error in query stage
• Remove redundant null checks
• Improve log readability
This commit is contained in:
yangdx 2025-11-08 01:43:36 +08:00
parent ffeeae4208
commit 03cc6262c4
2 changed files with 32 additions and 36 deletions

View file

@ -723,15 +723,18 @@ def create_app(args):
if args.embedding_binding == "jina":
# Jina API requires dimension parameter - always send it
send_dimensions = has_embedding_dim_param
dimension_control = "forced (Jina API requirement)"
dimension_control = "forced by Jina API"
else:
# For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting
send_dimensions = embedding_send_dim and has_embedding_dim_param
dimension_control = f"env_var={embedding_send_dim}"
if send_dimensions or not embedding_send_dim:
dimension_control = "by env var"
else:
dimension_control = "by not hasparam"
logger.info(
f"Embedding configuration: send_dimensions={send_dimensions} "
f"({dimension_control}, has_param={has_embedding_dim_param}, "
f"Send embedding dimension: {send_dimensions} {dimension_control} "
f"(dimensions={args.embedding_dim}, has_param={has_embedding_dim_param}, "
f"binding={args.embedding_binding})"
)

View file

@ -3425,10 +3425,10 @@ async def _perform_kg_search(
)
query_embedding = None
if query and (kg_chunk_pick_method == "VECTOR" or chunks_vdb):
embedding_func_config = text_chunks_db.embedding_func
if embedding_func_config and embedding_func_config.func:
actual_embedding_func = text_chunks_db.embedding_func
if actual_embedding_func:
try:
query_embedding = await embedding_func_config.func([query])
query_embedding = await actual_embedding_func([query])
query_embedding = query_embedding[
0
] # Extract first embedding from batch result
@ -4336,25 +4336,21 @@ async def _find_related_text_unit_from_entities(
num_of_chunks = int(max_related_chunks * len(entities_with_chunks) / 2)
# Get embedding function from global config
embedding_func_config = text_chunks_db.embedding_func
if not embedding_func_config:
actual_embedding_func = text_chunks_db.embedding_func
if not actual_embedding_func:
logger.warning("No embedding function found, falling back to WEIGHT method")
kg_chunk_pick_method = "WEIGHT"
else:
try:
actual_embedding_func = embedding_func_config.func
selected_chunk_ids = None
if actual_embedding_func:
selected_chunk_ids = await pick_by_vector_similarity(
query=query,
text_chunks_storage=text_chunks_db,
chunks_vdb=chunks_vdb,
num_of_chunks=num_of_chunks,
entity_info=entities_with_chunks,
embedding_func=actual_embedding_func,
query_embedding=query_embedding,
)
selected_chunk_ids = await pick_by_vector_similarity(
query=query,
text_chunks_storage=text_chunks_db,
chunks_vdb=chunks_vdb,
num_of_chunks=num_of_chunks,
entity_info=entities_with_chunks,
embedding_func=actual_embedding_func,
query_embedding=query_embedding,
)
if selected_chunk_ids == []:
kg_chunk_pick_method = "WEIGHT"
@ -4629,24 +4625,21 @@ async def _find_related_text_unit_from_relations(
num_of_chunks = int(max_related_chunks * len(relations_with_chunks) / 2)
# Get embedding function from global config
embedding_func_config = text_chunks_db.embedding_func
if not embedding_func_config:
actual_embedding_func = text_chunks_db.embedding_func
if not actual_embedding_func:
logger.warning("No embedding function found, falling back to WEIGHT method")
kg_chunk_pick_method = "WEIGHT"
else:
try:
actual_embedding_func = embedding_func_config.func
if actual_embedding_func:
selected_chunk_ids = await pick_by_vector_similarity(
query=query,
text_chunks_storage=text_chunks_db,
chunks_vdb=chunks_vdb,
num_of_chunks=num_of_chunks,
entity_info=relations_with_chunks,
embedding_func=actual_embedding_func,
query_embedding=query_embedding,
)
selected_chunk_ids = await pick_by_vector_similarity(
query=query,
text_chunks_storage=text_chunks_db,
chunks_vdb=chunks_vdb,
num_of_chunks=num_of_chunks,
entity_info=relations_with_chunks,
embedding_func=actual_embedding_func,
query_embedding=query_embedding,
)
if selected_chunk_ids == []:
kg_chunk_pick_method = "WEIGHT"