Prohibit direct access to internal functions of EmbeddingFunc.
• Fix similarity search error in query stage • Remove redundant null checks • Improve log readability
This commit is contained in:
parent
ffeeae4208
commit
03cc6262c4
2 changed files with 32 additions and 36 deletions
|
|
@ -723,15 +723,18 @@ def create_app(args):
|
|||
if args.embedding_binding == "jina":
|
||||
# Jina API requires dimension parameter - always send it
|
||||
send_dimensions = has_embedding_dim_param
|
||||
dimension_control = "forced (Jina API requirement)"
|
||||
dimension_control = "forced by Jina API"
|
||||
else:
|
||||
# For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting
|
||||
send_dimensions = embedding_send_dim and has_embedding_dim_param
|
||||
dimension_control = f"env_var={embedding_send_dim}"
|
||||
if send_dimensions or not embedding_send_dim:
|
||||
dimension_control = "by env var"
|
||||
else:
|
||||
dimension_control = "by not hasparam"
|
||||
|
||||
logger.info(
|
||||
f"Embedding configuration: send_dimensions={send_dimensions} "
|
||||
f"({dimension_control}, has_param={has_embedding_dim_param}, "
|
||||
f"Send embedding dimension: {send_dimensions} {dimension_control} "
|
||||
f"(dimensions={args.embedding_dim}, has_param={has_embedding_dim_param}, "
|
||||
f"binding={args.embedding_binding})"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -3425,10 +3425,10 @@ async def _perform_kg_search(
|
|||
)
|
||||
query_embedding = None
|
||||
if query and (kg_chunk_pick_method == "VECTOR" or chunks_vdb):
|
||||
embedding_func_config = text_chunks_db.embedding_func
|
||||
if embedding_func_config and embedding_func_config.func:
|
||||
actual_embedding_func = text_chunks_db.embedding_func
|
||||
if actual_embedding_func:
|
||||
try:
|
||||
query_embedding = await embedding_func_config.func([query])
|
||||
query_embedding = await actual_embedding_func([query])
|
||||
query_embedding = query_embedding[
|
||||
0
|
||||
] # Extract first embedding from batch result
|
||||
|
|
@ -4336,25 +4336,21 @@ async def _find_related_text_unit_from_entities(
|
|||
num_of_chunks = int(max_related_chunks * len(entities_with_chunks) / 2)
|
||||
|
||||
# Get embedding function from global config
|
||||
embedding_func_config = text_chunks_db.embedding_func
|
||||
if not embedding_func_config:
|
||||
actual_embedding_func = text_chunks_db.embedding_func
|
||||
if not actual_embedding_func:
|
||||
logger.warning("No embedding function found, falling back to WEIGHT method")
|
||||
kg_chunk_pick_method = "WEIGHT"
|
||||
else:
|
||||
try:
|
||||
actual_embedding_func = embedding_func_config.func
|
||||
|
||||
selected_chunk_ids = None
|
||||
if actual_embedding_func:
|
||||
selected_chunk_ids = await pick_by_vector_similarity(
|
||||
query=query,
|
||||
text_chunks_storage=text_chunks_db,
|
||||
chunks_vdb=chunks_vdb,
|
||||
num_of_chunks=num_of_chunks,
|
||||
entity_info=entities_with_chunks,
|
||||
embedding_func=actual_embedding_func,
|
||||
query_embedding=query_embedding,
|
||||
)
|
||||
selected_chunk_ids = await pick_by_vector_similarity(
|
||||
query=query,
|
||||
text_chunks_storage=text_chunks_db,
|
||||
chunks_vdb=chunks_vdb,
|
||||
num_of_chunks=num_of_chunks,
|
||||
entity_info=entities_with_chunks,
|
||||
embedding_func=actual_embedding_func,
|
||||
query_embedding=query_embedding,
|
||||
)
|
||||
|
||||
if selected_chunk_ids == []:
|
||||
kg_chunk_pick_method = "WEIGHT"
|
||||
|
|
@ -4629,24 +4625,21 @@ async def _find_related_text_unit_from_relations(
|
|||
num_of_chunks = int(max_related_chunks * len(relations_with_chunks) / 2)
|
||||
|
||||
# Get embedding function from global config
|
||||
embedding_func_config = text_chunks_db.embedding_func
|
||||
if not embedding_func_config:
|
||||
actual_embedding_func = text_chunks_db.embedding_func
|
||||
if not actual_embedding_func:
|
||||
logger.warning("No embedding function found, falling back to WEIGHT method")
|
||||
kg_chunk_pick_method = "WEIGHT"
|
||||
else:
|
||||
try:
|
||||
actual_embedding_func = embedding_func_config.func
|
||||
|
||||
if actual_embedding_func:
|
||||
selected_chunk_ids = await pick_by_vector_similarity(
|
||||
query=query,
|
||||
text_chunks_storage=text_chunks_db,
|
||||
chunks_vdb=chunks_vdb,
|
||||
num_of_chunks=num_of_chunks,
|
||||
entity_info=relations_with_chunks,
|
||||
embedding_func=actual_embedding_func,
|
||||
query_embedding=query_embedding,
|
||||
)
|
||||
selected_chunk_ids = await pick_by_vector_similarity(
|
||||
query=query,
|
||||
text_chunks_storage=text_chunks_db,
|
||||
chunks_vdb=chunks_vdb,
|
||||
num_of_chunks=num_of_chunks,
|
||||
entity_info=relations_with_chunks,
|
||||
embedding_func=actual_embedding_func,
|
||||
query_embedding=query_embedding,
|
||||
)
|
||||
|
||||
if selected_chunk_ids == []:
|
||||
kg_chunk_pick_method = "WEIGHT"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue