Rename function and variables for clarity in context building

- Rename _build_llm_context to _build_context_str
- Change text_units_context to chunks_context
- Move string building before early return
- Update log messages and comments
- Consistent variable naming throughout
This commit is contained in:
yangdx 2025-11-01 12:15:24 +08:00
parent 9a8742da59
commit ec2ea4fd3f

View file

@ -3832,7 +3832,7 @@ async def _merge_all_chunks(
return merged_chunks return merged_chunks
async def _build_llm_context( async def _build_context_str(
entities_context: list[dict], entities_context: list[dict],
relations_context: list[dict], relations_context: list[dict],
merged_chunks: list[dict], merged_chunks: list[dict],
@ -3932,23 +3932,32 @@ async def _build_llm_context(
truncated_chunks truncated_chunks
) )
# Rebuild text_units_context with truncated chunks # Rebuild chunks_context with truncated chunks
# The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
text_units_context = [] chunks_context = []
for i, chunk in enumerate(truncated_chunks): for i, chunk in enumerate(truncated_chunks):
text_units_context.append( chunks_context.append(
{ {
"reference_id": chunk["reference_id"], "reference_id": chunk["reference_id"],
"content": chunk["content"], "content": chunk["content"],
} }
) )
text_units_str = "\n".join(
json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context
)
reference_list_str = "\n".join(
f"[{ref['reference_id']}] {ref['file_path']}"
for ref in reference_list
if ref["reference_id"]
)
logger.info( logger.info(
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks" f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(chunks_context)} chunks"
) )
# not necessary to use LLM to generate a response # not necessary to use LLM to generate a response
if not entities_context and not relations_context: if not entities_context and not relations_context and not chunks_context:
# Return empty raw data structure when no entities/relations # Return empty raw data structure when no entities/relations
empty_raw_data = convert_to_user_format( empty_raw_data = convert_to_user_format(
[], [],
@ -3979,15 +3988,6 @@ async def _build_llm_context(
if chunk_tracking_log: if chunk_tracking_log:
logger.info(f"Final chunks S+F/O: {' '.join(chunk_tracking_log)}") logger.info(f"Final chunks S+F/O: {' '.join(chunk_tracking_log)}")
text_units_str = "\n".join(
json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context
)
reference_list_str = "\n".join(
f"[{ref['reference_id']}] {ref['file_path']}"
for ref in reference_list
if ref["reference_id"]
)
result = kg_context_template.format( result = kg_context_template.format(
entities_str=entities_str, entities_str=entities_str,
relations_str=relations_str, relations_str=relations_str,
@ -3997,7 +3997,7 @@ async def _build_llm_context(
# Always return both context and complete data structure (unified approach) # Always return both context and complete data structure (unified approach)
logger.debug( logger.debug(
f"[_build_llm_context] Converting to user format: {len(entities_context)} entities, {len(relations_context)} relations, {len(truncated_chunks)} chunks" f"[_build_context_str] Converting to user format: {len(entities_context)} entities, {len(relations_context)} relations, {len(truncated_chunks)} chunks"
) )
final_data = convert_to_user_format( final_data = convert_to_user_format(
entities_context, entities_context,
@ -4009,7 +4009,7 @@ async def _build_llm_context(
relation_id_to_original, relation_id_to_original,
) )
logger.debug( logger.debug(
f"[_build_llm_context] Final data after conversion: {len(final_data.get('entities', []))} entities, {len(final_data.get('relationships', []))} relationships, {len(final_data.get('chunks', []))} chunks" f"[_build_context_str] Final data after conversion: {len(final_data.get('entities', []))} entities, {len(final_data.get('relationships', []))} relationships, {len(final_data.get('chunks', []))} chunks"
) )
return result, final_data return result, final_data
@ -4086,8 +4086,8 @@ async def _build_query_context(
return None return None
# Stage 4: Build final LLM context with dynamic token processing # Stage 4: Build final LLM context with dynamic token processing
# _build_llm_context now always returns tuple[str, dict] # _build_context_str now always returns tuple[str, dict]
context, raw_data = await _build_llm_context( context, raw_data = await _build_context_str(
entities_context=truncation_result["entities_context"], entities_context=truncation_result["entities_context"],
relations_context=truncation_result["relations_context"], relations_context=truncation_result["relations_context"],
merged_chunks=merged_chunks, merged_chunks=merged_chunks,
@ -4860,10 +4860,10 @@ async def naive_query(
"final_chunks_count": len(processed_chunks_with_ref_ids), "final_chunks_count": len(processed_chunks_with_ref_ids),
} }
# Build text_units_context from processed chunks with reference IDs # Build chunks_context from processed chunks with reference IDs
text_units_context = [] chunks_context = []
for i, chunk in enumerate(processed_chunks_with_ref_ids): for i, chunk in enumerate(processed_chunks_with_ref_ids):
text_units_context.append( chunks_context.append(
{ {
"reference_id": chunk["reference_id"], "reference_id": chunk["reference_id"],
"content": chunk["content"], "content": chunk["content"],
@ -4871,7 +4871,7 @@ async def naive_query(
) )
text_units_str = "\n".join( text_units_str = "\n".join(
json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context
) )
reference_list_str = "\n".join( reference_list_str = "\n".join(
f"[{ref['reference_id']}] {ref['file_path']}" f"[{ref['reference_id']}] {ref['file_path']}"